def test_get_hook_base_dir(self, provide_directory_mock): fileCache = FileCache(self.config) # Check missing parameter command = {'commandParams': {}} base = fileCache.get_hook_base_dir(command, "server_url_pref") self.assertEqual(base, None) self.assertFalse(provide_directory_mock.called) # Check existing dir case command = { 'commandParams': { 'hooks_folder': os.path.join('HDP', '2.1.1', 'hooks') } } provide_directory_mock.return_value = "dummy value" fileCache = FileCache(self.config) res = fileCache.get_hook_base_dir(command, "server_url_pref") self.assertEquals( pprint.pformat(provide_directory_mock.call_args_list[0][0]), "('/var/lib/ambari-agent/cache', " "{0}, " "'server_url_pref')".format( pprint.pformat(os.path.join('stacks', 'HDP', '2.1.1', 'hooks')))) self.assertEquals(res, "dummy value")
def cache_factory(mgr, kind): if kind == cache_options[0]: return Cache(mgr) elif kind == cache_options[1]: return FileCache(mgr) else: raise ValueError("%s is not a valid cache type!".format(kind))
def __init__(self, config, controller): self.config = config self.tmp_dir = config.get('agent', 'prefix') self.force_https_protocol = config.get_force_https_protocol() self.exec_tmp_dir = Constants.AGENT_TMP_DIR self.file_cache = FileCache(config) self.status_commands_stdout = os.path.join( self.tmp_dir, 'status_command_stdout.txt') self.status_commands_stderr = os.path.join( self.tmp_dir, 'status_command_stderr.txt') self.public_fqdn = hostname.public_hostname(config) # cache reset will be called on every agent registration controller.registration_listeners.append(self.file_cache.reset) # Construct the hadoop credential lib JARs path self.credential_shell_lib_path = os.path.join( config.get('security', 'credential_lib_dir', self.DEFAULT_CREDENTIAL_SHELL_LIB_PATH), '*') self.credential_conf_dir = config.get('security', 'credential_conf_dir', self.DEFAULT_CREDENTIAL_CONF_DIR) self.credential_shell_cmd = config.get( 'security', 'credential_shell_cmd', self.DEFAULT_CREDENTIAL_SHELL_CMD) # Clean up old status command files if any try: os.unlink(self.status_commands_stdout) os.unlink(self.status_commands_stderr) except OSError: pass # Ignore fail self.commands_in_progress_lock = threading.RLock() self.commands_in_progress = {}
def __generate(self): self.__process() for namespace in self.api_description.namespaces: Capi.__substitute_implementation_class_name(namespace) namespace_generators = create_namespace_generators( self.api_description, self.params_description) by_first_argument_exception_traits = ExceptionTraits.ByFirstArgument( self.params_description, namespace_generators) no_handling_exception_traits = ExceptionTraits.NoHandling() if self.params_description.exception_handling_mode == TExceptionHandlingMode.by_first_argument: main_exception_traits = by_first_argument_exception_traits else: main_exception_traits = no_handling_exception_traits capi_generator = CapiGenerator(main_exception_traits, no_handling_exception_traits, self.params_description, self.api_description) file_cache = FileCache(self.params_description) for namespace_generator in namespace_generators: namespace_generator.generate(file_cache, capi_generator) capi_generator.generate(file_cache) self.__generate_root_header(namespace_generators, file_cache) if self.unit_tests_generator: self.unit_tests_generator.generate(namespace_generators)
def test_unpack_archive(self): tmpdir = tempfile.mkdtemp() dummy_archive_name = os.path.join("ambari_agent", "dummy_files", "dummy_archive.zip") archive_file = open(dummy_archive_name, "rb") fileCache = FileCache(self.config) fileCache.unpack_archive(archive_file, tmpdir) # Count summary size of unpacked files: total_size = 0 total_files = 0 total_dirs = 0 for dirpath, dirnames, filenames in os.walk(tmpdir): total_dirs += 1 for f in filenames: fp = os.path.join(dirpath, f) total_size += os.path.getsize(fp) total_files += 1 self.assertEquals(total_size, 51258L) self.assertEquals(total_files, 28) self.assertEquals(total_dirs, 8) shutil.rmtree(tmpdir) # Test exception handling with patch("os.path.isdir") as isdir_mock: isdir_mock.side_effect = self.exc_side_effect try: fileCache.unpack_archive(archive_file, tmpdir) self.fail('CachingException not thrown') except CachingException: pass # Expected except Exception, e: self.fail('Unexpected exception thrown:' + str(e))
def test_get_service_base_dir(self, isdir_mock): fileCache = FileCache(self.config) isdir_mock.return_value = True base = fileCache.get_service_base_dir("HDP", "2.0.7", "HBASE", "REGION_SERVER") self.assertEqual( base, "/var/lib/ambari-agent/cache/stacks/HDP/2.0.7/services/HBASE")
def test_get_custom_actions_base_dir(self, provide_directory_mock): provide_directory_mock.return_value = "dummy value" fileCache = FileCache(self.config) res = fileCache.get_custom_actions_base_dir("server_url_pref") self.assertEquals( pprint.pformat(provide_directory_mock.call_args_list[0][0]), "('/var/lib/ambari-agent/cache', 'custom_actions', 'server_url_pref')") self.assertEquals(res, "dummy value")
def test_invalidate_directory(self, makedirs_mock, rmtree_mock, unlink_mock, isdir_mock, isfile_mock, exists_mock): fileCache = FileCache(self.config) # Test execution flow if path points to file isfile_mock.return_value = True isdir_mock.return_value = False exists_mock.return_value = True fileCache.invalidate_directory("dummy-dir") self.assertTrue(unlink_mock.called) self.assertFalse(rmtree_mock.called) self.assertTrue(makedirs_mock.called) unlink_mock.reset_mock() rmtree_mock.reset_mock() makedirs_mock.reset_mock() # Test execution flow if path points to dir isfile_mock.return_value = False isdir_mock.return_value = True exists_mock.return_value = True fileCache.invalidate_directory("dummy-dir") self.assertFalse(unlink_mock.called) self.assertTrue(rmtree_mock.called) self.assertTrue(makedirs_mock.called) unlink_mock.reset_mock() rmtree_mock.reset_mock() makedirs_mock.reset_mock() # Test execution flow if path points nowhere isfile_mock.return_value = False isdir_mock.return_value = False exists_mock.return_value = False fileCache.invalidate_directory("dummy-dir") self.assertFalse(unlink_mock.called) self.assertFalse(rmtree_mock.called) self.assertTrue(makedirs_mock.called) unlink_mock.reset_mock() rmtree_mock.reset_mock() makedirs_mock.reset_mock() # Test exception handling makedirs_mock.side_effect = self.exc_side_effect try: fileCache.invalidate_directory("dummy-dir") self.fail('CachingException not thrown') except CachingException: pass # Expected except Exception, e: self.fail('Unexpected exception thrown:' + str(e))
def test_build_download_url(self): fileCache = FileCache(self.config) url = fileCache.build_download_url('http://localhost:8080/resources/', 'stacks/HDP/2.1.1/hooks', 'archive.zip') self.assertEqual( url, 'http://localhost:8080/resources//stacks/HDP/2.1.1/hooks/archive.zip' )
def test_get_service_base_dir(self, provide_directory_mock): provide_directory_mock.return_value = "dummy value" fileCache = FileCache(self.config) command = { 'commandParams' : { 'service_package_folder' : 'HDP/2.1.1/services/ZOOKEEPER/package' } } res = fileCache.get_service_base_dir(command, "server_url_pref") self.assertEquals( pprint.pformat(provide_directory_mock.call_args_list[0][0]), "('/var/lib/ambari-agent/cache',\n " "'stacks/HDP/2.1.1/services/ZOOKEEPER/package',\n" " 'server_url_pref')") self.assertEquals(res, "dummy value")
def __init__(self, config, controller): self.config = config self.tmp_dir = config.get('agent', 'prefix') self.file_cache = FileCache(config) self.python_executor = PythonExecutor(self.tmp_dir, config) self.status_commands_stdout = os.path.join(self.tmp_dir, 'status_command_stdout.txt') self.status_commands_stderr = os.path.join(self.tmp_dir, 'status_command_stderr.txt') # cache reset will be called on every agent registration controller.registration_listeners.append(self.file_cache.reset) # Clean up old status command files if any try: os.unlink(self.status_commands_stdout) os.unlink(self.status_commands_stderr) except OSError: pass # Ignore fail
def process_external_namespaces(namespaces: [object], external_namespaces: [object]): for cur_namespace in namespaces: external_namespace = TExternalNamespace() external_namespace.name = cur_namespace.name external_namespace.detach_method_name = new_params.detach_method_name external_namespace.get_raw_pointer_method_name = new_params.get_raw_pointer_method_name file_cache = FileCache(new_params) external_namespace.include = file_cache.namespace_header(cur_namespace.full_name_array) process_external_namespaces(cur_namespace.nested_namespaces, external_namespace.namespaces) for cur_class in cur_namespace.classes: external_class = TExternalClass() external_class.name = cur_class.name external_class.wrap_name = cur_class.wrap_name external_class.include_declaration = file_cache.class_header_decl(cur_class.full_name_array) external_class.include_definition = file_cache.class_header(cur_class.full_name_array) external_namespace.classes.append(external_class) external_namespaces.append(external_namespace)
def test_provide_directory_no_update(self, build_download_url_mock): try: self.config.set(AmbariConfig.AMBARI_PROPERTIES_CATEGORY, FileCache.ENABLE_AUTO_AGENT_CACHE_UPDATE_KEY, "false") fileCache = FileCache(self.config) # Test uptodate dirs after start path = os.path.join("cache_path", "subdirectory") res = fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.assertEquals(res, path) self.assertFalse(build_download_url_mock.called) finally: self.config.set(AmbariConfig.AMBARI_PROPERTIES_CATEGORY, FileCache.ENABLE_AUTO_AGENT_CACHE_UPDATE_KEY, "true") pass
def __init__(self, config, controller): self.config = config self.tmp_dir = config.get('agent', 'prefix') self.exec_tmp_dir = Constants.AGENT_TMP_DIR self.file_cache = FileCache(config) self.status_commands_stdout = os.path.join( self.tmp_dir, 'status_command_stdout.txt') self.status_commands_stderr = os.path.join( self.tmp_dir, 'status_command_stderr.txt') self.public_fqdn = hostname.public_hostname(config) # cache reset will be called on every agent registration controller.registration_listeners.append(self.file_cache.reset) # Clean up old status command files if any try: os.unlink(self.status_commands_stdout) os.unlink(self.status_commands_stderr) except OSError: pass # Ignore fail self.commands_in_progress_lock = threading.RLock() self.commands_in_progress = {}
def test_read_write_hash_sum(self): tmpdir = tempfile.mkdtemp() dummyhash = "DUMMY_HASH" fileCache = FileCache(self.config) fileCache.write_hash_sum(tmpdir, dummyhash) newhash = fileCache.read_hash_sum(tmpdir) self.assertEquals(newhash, dummyhash) shutil.rmtree(tmpdir) # Test read of not existing file newhash = fileCache.read_hash_sum(tmpdir) self.assertEquals(newhash, None) # Test write to not existing file with patch("__builtin__.open") as open_mock: open_mock.side_effect = self.exc_side_effect try: fileCache.write_hash_sum(tmpdir, dummyhash) self.fail('CachingException not thrown') except CachingException: pass # Expected except Exception, e: self.fail('Unexpected exception thrown:' + str(e))
def test_fetch_url(self, urlopen_mock): fileCache = FileCache(self.config) remote_url = "http://dummy-url/" # Test normal download test_str = 'abc' * 100000 # Very long string test_string_io = StringIO.StringIO(test_str) test_buffer = MagicMock() test_buffer.read.side_effect = test_string_io.read urlopen_mock.return_value = test_buffer memory_buffer = fileCache.fetch_url(remote_url) self.assertEquals(memory_buffer.getvalue(), test_str) self.assertEqual(test_buffer.read.call_count, 20) # depends on buffer size # Test exception handling test_buffer.read.side_effect = self.exc_side_effect try: fileCache.fetch_url(remote_url) self.fail('CachingException not thrown') except CachingException: pass # Expected except Exception, e: self.fail('Unexpected exception thrown:' + str(e))
def run(_K, _recommended_artists): avg_prec = 0 avg_rec = 0 no_users = UAM.shape[0] no_artists = UAM.shape[1] cf_file = FileCache("CF", _K, _recommended_artists) cb_file = FileCache("CB_Wiki", _K, _recommended_artists) pb_file = FileCache("PB", 1, _recommended_artists) recommended_artists = {} for u in range(0, no_users): # Get seed user's artists listened to u_aidx = np.nonzero(UAM[u, :])[0] if NF >= len(u_aidx) or u == no_users - 1: continue # Split user's artists into train and test set for cross-fold (CV) validation fold = 0 kf = cross_validation.KFold( len(u_aidx), n_folds=NF) # create folds (splits) for 5-fold CV for train_aidx, test_aidx in kf: # for all folds # Show progress if VERBOSE: print "User: "******", Fold: " + str(fold) + ", Training items: " + str( len(train_aidx)) + ", Test items: " + str( len(test_aidx) ), # the comma at the end avoids line break # Call recommend function copy_UAM = UAM.copy( ) # we need to create a copy of the UAM, otherwise modifications within recommend function will effect the variable dict_rec_aidx_CB = cb_file.read_for_hybrid( u, fold) #recommend_CB(AAM, u_aidx[train_aidx], _K) dict_rec_aidx_PB = pb_file.read_for_hybrid( u, fold ) #recommend_PB(copy_UAM, u_aidx[train_aidx], _recommended_artists) dict_rec_aidx_CF = cf_file.read_for_hybrid( u, fold ) #recommend_CF(copy_UAM, u_aidx[train_aidx], _recommended_artists) # @JPEER check in group if that solution is fair enough if len(dict_rec_aidx_CB) == 0 or len(dict_rec_aidx_PB) == 0 or len( dict_rec_aidx_CF) == 0: continue # Fuse scores given by CB and by PB recommenders # First, create matrix to hold scores per recommendation method per artist scores = np.zeros(shape=(3, no_artists), dtype=np.float32) # Add scores from CB and CF recommenders to this matrix for aidx in dict_rec_aidx_CB.keys(): scores[0, aidx] = dict_rec_aidx_CB[aidx] for aidx in dict_rec_aidx_PB.keys(): scores[1, aidx] = dict_rec_aidx_PB[aidx] for aidx in dict_rec_aidx_CF.keys(): scores[2, aidx] = dict_rec_aidx_CF[aidx] # Convert scores to ranks ranks = np.zeros(shape=(3, no_artists), dtype=np.int16) # init rank matrix for m in range(0, scores.shape[0]): # for all methods to fuse aidx_nz = np.nonzero( scores[m])[0] # identify artists with positive scores scores_sorted_idx = np.argsort( scores[m, aidx_nz] ) # sort artists with positive scores according to their score # Insert votes (i.e., inverse ranks) for each artist and current method for a in range(0, len(scores_sorted_idx)): ranks[m, aidx_nz[scores_sorted_idx[a]]] = a + 1 # Sum ranks over different approaches ranks_fused = np.sum(ranks, axis=0) # Sort and select top K_HR artists to recommend sorted_idx = np.argsort(ranks_fused) sorted_idx_top = sorted_idx[-_recommended_artists:] # Put (artist index, score) pairs of highest scoring artists in a dictionary dict_rec_aidx = {} for i in range(0, len(sorted_idx_top)): dict_rec_aidx[sorted_idx_top[i]] = ranks_fused[ sorted_idx_top[i]] # Distill recommended artist indices from dictionary returned by the recommendation functions rec_aidx = dict_rec_aidx.keys() if VERBOSE: print "Recommended items: ", len(rec_aidx) # Compute performance measures correct_aidx = np.intersect1d( u_aidx[test_aidx], rec_aidx) # correctly predicted artists # True Positives is amount of overlap in recommended artists and test artists TP = len(correct_aidx) # False Positives is recommended artists minus correctly predicted ones FP = len(np.setdiff1d(rec_aidx, correct_aidx)) # Precision is percentage of correctly predicted among predicted # Handle special case that not a single artist could be recommended -> by definition, precision = 100% if len(rec_aidx) == 0: prec = 100.0 else: prec = 100.0 * TP / len(rec_aidx) # Recall is percentage of correctly predicted among all listened to # Handle special case that there is no single artist in the test set -> by definition, recall = 100% if len(test_aidx) == 0: rec = 100.0 else: rec = 100.0 * TP / len(test_aidx) # add precision and recall for current user and fold to aggregate variables avg_prec += prec / (NF * no_users) avg_rec += rec / (NF * no_users) # Output precision and recall of current fold if VERBOSE: print("\tPrecision: %.2f, Recall: %.2f" % (prec, rec)) # Increase fold counter fold += 1 # Output mean average precision and recall if VERBOSE: print("\nMAP: %.2f, MAR %.2f" % (avg_prec, avg_rec)) print("%.3f, %.3f" % (avg_prec, avg_rec)) f1_score = 2 * ((avg_prec * avg_rec) / (avg_prec + avg_rec)) data = {} data['avg_prec'] = avg_prec data['avg_rec'] = avg_rec data['f1_score'] = f1_score data['recommended'] = False return data
def test_reset(self): fileCache = FileCache(self.config) fileCache.uptodate_paths.append('dummy-path') fileCache.reset() self.assertFalse(fileCache.uptodate_paths)
class TestFileCache(TestCase): def setUp(self): # disable stdout out = StringIO.StringIO() sys.stdout = out # generate sample config tmpdir = tempfile.gettempdir() self.config = ConfigParser.RawConfigParser() self.config.add_section('agent') self.config.set('agent', 'prefix', tmpdir) self.config.set('agent', 'cache_dir', "/var/lib/ambari-agent/cache") self.config.set('agent', 'tolerate_download_failures', "true") def test_reset(self): fileCache = FileCache(self.config) fileCache.uptodate_paths.append('dummy-path') fileCache.reset() self.assertFalse(fileCache.uptodate_paths) @patch.object(FileCache, "provide_directory") def test_get_service_base_dir(self, provide_directory_mock): provide_directory_mock.return_value = "dummy value" fileCache = FileCache(self.config) command = { 'commandParams': { 'service_package_folder': os.path.join('stacks', 'HDP', '2.1.1', 'services', 'ZOOKEEPER', 'package') } } res = fileCache.get_service_base_dir(command, "server_url_pref") self.assertEquals( pprint.pformat(provide_directory_mock.call_args_list[0][0]), "('/var/lib/ambari-agent/cache',\n " "{0},\n" " 'server_url_pref')".format( pprint.pformat( os.path.join('stacks', 'HDP', '2.1.1', 'services', 'ZOOKEEPER', 'package')))) self.assertEquals(res, "dummy value") @patch.object(FileCache, "provide_directory") def test_get_hook_base_dir(self, provide_directory_mock): fileCache = FileCache(self.config) # Check missing parameter command = {'commandParams': {}} base = fileCache.get_hook_base_dir(command, "server_url_pref") self.assertEqual(base, None) self.assertFalse(provide_directory_mock.called) # Check existing dir case command = { 'commandParams': { 'hooks_folder': os.path.join('HDP', '2.1.1', 'hooks') } } provide_directory_mock.return_value = "dummy value" fileCache = FileCache(self.config) res = fileCache.get_hook_base_dir(command, "server_url_pref") self.assertEquals( pprint.pformat(provide_directory_mock.call_args_list[0][0]), "('/var/lib/ambari-agent/cache', " "{0}, " "'server_url_pref')".format( pprint.pformat(os.path.join('stacks', 'HDP', '2.1.1', 'hooks')))) self.assertEquals(res, "dummy value") @patch.object(FileCache, "provide_directory") def test_get_custom_actions_base_dir(self, provide_directory_mock): provide_directory_mock.return_value = "dummy value" fileCache = FileCache(self.config) res = fileCache.get_custom_actions_base_dir("server_url_pref") self.assertEquals( pprint.pformat(provide_directory_mock.call_args_list[0][0]), "('/var/lib/ambari-agent/cache', 'custom_actions', 'server_url_pref')" ) self.assertEquals(res, "dummy value") @patch.object(FileCache, "build_download_url") @patch.object(FileCache, "fetch_url") @patch.object(FileCache, "read_hash_sum") @patch.object(FileCache, "invalidate_directory") @patch.object(FileCache, "unpack_archive") @patch.object(FileCache, "write_hash_sum") def test_provide_directory(self, write_hash_sum_mock, unpack_archive_mock, invalidate_directory_mock, read_hash_sum_mock, fetch_url_mock, build_download_url_mock): build_download_url_mock.return_value = "http://dummy-url/" HASH1 = "hash1" membuffer = MagicMock() membuffer.getvalue.return_value.strip.return_value = HASH1 fileCache = FileCache(self.config) # Test uptodate dirs after start self.assertFalse(fileCache.uptodate_paths) path = os.path.join("cache_path", "subdirectory") # Test initial downloading (when dir does not exist) fetch_url_mock.return_value = membuffer read_hash_sum_mock.return_value = "hash2" res = fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.assertTrue(invalidate_directory_mock.called) self.assertTrue(write_hash_sum_mock.called) self.assertEquals(fetch_url_mock.call_count, 2) self.assertEquals(pprint.pformat(fileCache.uptodate_paths), pprint.pformat([path])) self.assertEquals(res, path) fetch_url_mock.reset_mock() write_hash_sum_mock.reset_mock() invalidate_directory_mock.reset_mock() unpack_archive_mock.reset_mock() # Test cache invalidation when local hash does not differ fetch_url_mock.return_value = membuffer read_hash_sum_mock.return_value = HASH1 fileCache.reset() res = fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.assertFalse(invalidate_directory_mock.called) self.assertFalse(write_hash_sum_mock.called) self.assertEquals(fetch_url_mock.call_count, 1) self.assertEquals(pprint.pformat(fileCache.uptodate_paths), pprint.pformat([path])) self.assertEquals(res, path) fetch_url_mock.reset_mock() write_hash_sum_mock.reset_mock() invalidate_directory_mock.reset_mock() unpack_archive_mock.reset_mock() # Test execution path when path is up-to date (already checked) res = fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.assertFalse(invalidate_directory_mock.called) self.assertFalse(write_hash_sum_mock.called) self.assertEquals(fetch_url_mock.call_count, 0) self.assertEquals(pprint.pformat(fileCache.uptodate_paths), pprint.pformat([path])) self.assertEquals(res, path) # Check exception handling when tolerance is disabled self.config.set('agent', 'tolerate_download_failures', "false") fetch_url_mock.side_effect = self.caching_exc_side_effect fileCache = FileCache(self.config) try: fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.fail('CachingException not thrown') except CachingException: pass # Expected except Exception, e: self.fail('Unexpected exception thrown:' + str(e)) # Check that unexpected exceptions are still propagated when # tolerance is enabled self.config.set('agent', 'tolerate_download_failures', "false") fetch_url_mock.side_effect = self.exc_side_effect fileCache = FileCache(self.config) try: fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.fail('Exception not thrown') except Exception: pass # Expected # Check exception handling when tolerance is enabled self.config.set('agent', 'tolerate_download_failures', "true") fetch_url_mock.side_effect = self.caching_exc_side_effect fileCache = FileCache(self.config) res = fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.assertEquals(res, path)
def __init__(self, config): self.config = config self.tmp_dir = config.get('agent', 'prefix') self.file_cache = FileCache(config) self.python_executor = PythonExecutor(self.tmp_dir, config)
def run(_K, _recommended_artists): # Initialize variables to hold performance measures avg_prec = 0 # mean precision avg_rec = 0 # mean recall df_a_file = FileCache("DF_age", _K, _recommended_artists) df_c_file = FileCache("DF_country", _K, _recommended_artists) df_g_file = FileCache("DF_gender", _K, _recommended_artists) # For all users in our data (UAM) no_users = UAM.shape[0] no_artists = UAM.shape[1] for u in range(0, no_users): # Get seed user's artists listened to u_aidx = np.nonzero(UAM[u, :])[0] if NF >= len(u_aidx) or u == no_users - 1: continue # Split user's artists into train and test set for cross-fold (CV) validation fold = 0 kf = cross_validation.KFold( len(u_aidx), n_folds=NF) # create folds (splits) for 5-fold CV for train_aidx, test_aidx in kf: # for all folds if VERBOSE: print "User: "******", Fold: " + str(fold) + ", Training items: " + str( len(train_aidx)) + ", Test items: " + str( len(test_aidx) ), # the comma at the end avoids line break # Call recommend function copy_UAM = UAM.copy( ) # we need to create a copy of the UAM, otherwise modifications within recommend function will effect the variable ############################################### ## Combine CB and CF together so we get a HF ## ############################################### dict_rec_aidx_DF_A = df_a_file.read_for_hybrid(u, fold) dict_rec_aidx_DF_C = df_c_file.read_for_hybrid(u, fold) dict_rec_aidx_DF_G = df_g_file.read_for_hybrid(u, fold) # @JPEER check in group if that solution is fair enough if len(dict_rec_aidx_DF_A) == 0 or len( dict_rec_aidx_DF_C) == 0 or len(dict_rec_aidx_DF_G) == 0: continue # Fuse scores given by CF and by CB recommenders # First, create matrix to hold scores per recommendation method per artist scores = np.zeros(shape=(3, no_artists), dtype=np.float32) # Add scores from CB and CF recommenders to this matrix for aidx in dict_rec_aidx_DF_A.keys(): scores[0, aidx] = dict_rec_aidx_DF_A[aidx] for aidx in dict_rec_aidx_DF_C.keys(): scores[1, aidx] = dict_rec_aidx_DF_C[aidx] for aidx in dict_rec_aidx_DF_G.keys(): scores[2, aidx] = dict_rec_aidx_DF_G[aidx] # Apply aggregation function (here, just take arithmetic mean of scores) scores_fused = np.mean(scores, axis=0) # Sort and select top K_HR artists to recommend sorted_idx = np.argsort(scores_fused) sorted_idx_top = sorted_idx[-_recommended_artists:] # Put (artist index, score) pairs of highest scoring artists in a dictionary dict_rec_aidx = {} for i in range(0, len(sorted_idx_top)): dict_rec_aidx[sorted_idx_top[i]] = scores_fused[ sorted_idx_top[i]] # Distill recommended artist indices from dictionary returned by the recommendation functions rec_aidx = dict_rec_aidx.keys() if VERBOSE: print "Recommended items: ", len(rec_aidx) # Compute performance measures correct_aidx = np.intersect1d( u_aidx[test_aidx], rec_aidx) # correctly predicted artists # TP - True Positives is amount of overlap in recommended artists and test artists # FP - False Positives is recommended artists minus correctly predicted ones TP = len(correct_aidx) FP = len(np.setdiff1d(rec_aidx, correct_aidx)) # Precision is percentage of correctly predicted among predicted # Handle special case that not a single artist could be recommended -> by definition, precision = 100% if len(rec_aidx) == 0: prec = 100.0 else: prec = 100.0 * TP / len(rec_aidx) # Recall is percentage of correctly predicted among all listened to # Handle special case that there is no single artist in the test set -> by definition, recall = 100% if len(test_aidx) == 0: rec = 100.0 else: rec = 100.0 * TP / len(test_aidx) # add precision and recall for current user and fold to aggregate variables avg_prec += prec / (NF * no_users) avg_rec += rec / (NF * no_users) # Output precision and recall of current fold if VERBOSE: print("\tPrecision: %.2f, Recall: %.2f" % (prec, rec)) # Increase fold counter fold += 1 f1_score = 2 * ((avg_prec * avg_rec) / (avg_prec + avg_rec)) # Output mean average precision and recall if VERBOSE: print("\nMAP: %.2f, MAR %.2f, F1 Scrore: %.2f" % (avg_prec, avg_rec, f1_score)) print("%.3f, %.3f" % (avg_prec, avg_rec)) print("K neighbors " + str(K)) print("Recommendation: " + str(_recommended_artists)) data = {} data['f1_score'] = f1_score data['avg_prec'] = avg_prec data['avg_rec'] = avg_rec data['recommended'] = False return data
#!/usr/bin/env python3 # # proxy data cache test # import time from FileCache import FileCache def log(s): print(time.ctime() + " testFileCache: " + s) fn1 = "testfiles1.txt" log("creating fileCache1") c = FileCache(fn1) f1 = "size=8429&ed2k=C2FAD4A41C26FD8840A72350C9A10A47&fmask=7FF8FFF9FE&amask=0000FCC0" log("getting file1..") d1 = c.getFile(f1) if None == d1: log("getFile1: " + str(d1) + " as expected") else: log("getFile1: " + str(d1) + " - not expected") d1 = "test|data|not|important & confusing" log("adding file1..") c.addFile(f1, d1) log("getting file1 again..") d2 = c.getFile(f1) if d1 == d2:
def test_provide_directory(self, write_hash_sum_mock, unpack_archive_mock, invalidate_directory_mock, read_hash_sum_mock, fetch_url_mock, build_download_url_mock): build_download_url_mock.return_value = "http://dummy-url/" HASH1 = "hash1" membuffer = MagicMock() membuffer.getvalue.return_value.strip.return_value = HASH1 fileCache = FileCache(self.config) # Test uptodate dirs after start self.assertFalse(fileCache.uptodate_paths) path = os.path.join("cache_path", "subdirectory") # Test initial downloading (when dir does not exist) fetch_url_mock.return_value = membuffer read_hash_sum_mock.return_value = "hash2" res = fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.assertTrue(invalidate_directory_mock.called) self.assertTrue(write_hash_sum_mock.called) self.assertEquals(fetch_url_mock.call_count, 2) self.assertEquals(pprint.pformat(fileCache.uptodate_paths), pprint.pformat([path])) self.assertEquals(res, path) fetch_url_mock.reset_mock() write_hash_sum_mock.reset_mock() invalidate_directory_mock.reset_mock() unpack_archive_mock.reset_mock() # Test cache invalidation when local hash does not differ fetch_url_mock.return_value = membuffer read_hash_sum_mock.return_value = HASH1 fileCache.reset() res = fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.assertFalse(invalidate_directory_mock.called) self.assertFalse(write_hash_sum_mock.called) self.assertEquals(fetch_url_mock.call_count, 1) self.assertEquals(pprint.pformat(fileCache.uptodate_paths), pprint.pformat([path])) self.assertEquals(res, path) fetch_url_mock.reset_mock() write_hash_sum_mock.reset_mock() invalidate_directory_mock.reset_mock() unpack_archive_mock.reset_mock() # Test execution path when path is up-to date (already checked) res = fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.assertFalse(invalidate_directory_mock.called) self.assertFalse(write_hash_sum_mock.called) self.assertEquals(fetch_url_mock.call_count, 0) self.assertEquals(pprint.pformat(fileCache.uptodate_paths), pprint.pformat([path])) self.assertEquals(res, path) # Check exception handling when tolerance is disabled self.config.set('agent', 'tolerate_download_failures', "false") fetch_url_mock.side_effect = self.caching_exc_side_effect fileCache = FileCache(self.config) try: fileCache.provide_directory("cache_path", "subdirectory", "server_url_prefix") self.fail('CachingException not thrown') except CachingException: pass # Expected except Exception, e: self.fail('Unexpected exception thrown:' + str(e))
def run(_K, _recommended_artists): """ Function to run an evaluation experiment """ # Initialize variables to hold performance measures avg_prec = 0 # mean precision avg_rec = 0 # mean recall cb_file = FileCache("CB_Wiki", _K, _recommended_artists) cf_file = FileCache("CF", _K, _recommended_artists) # For all users in our data (UAM) no_users = UAM.shape[0] for u in range(0, no_users): # Get seed user's artists listened to # u_aidx = np.nonzero(UAM[u, :])[0] u_aidx = np.nonzero(UAM[u, :])[0] if NF >= len(u_aidx) or u == no_users - 1: continue # Split user's artists into train and test set for cross-fold (CV) validation fold = 0 # create folds (splits) for 10-fold CV kf = cross_validation.KFold(len(u_aidx), n_folds=NF) # For all folds for train_aidx, test_aidx in kf: if VERBOSE: print "User: "******", Fold: " + str(fold) + ", Training items: " + str( len(train_aidx)) + ", Test items: " + str( len(test_aidx)), # Create a copy of the UAM, otherwise modifications within recommend function will effect the variable copy_UAM = UAM.copy() # Call recommend function rec_aidx_CF = cf_file.read_for_hybrid( u, fold) # recommend_CF(copy_UAM, u, u_aidx[train_aidx]) rec_aidx_CB = cb_file.read_for_hybrid( u, fold) # recommend_CB(AAM, u_aidx[train_aidx], _K) # @JPEER check in group if that solution is fair enough if len(rec_aidx_CF) == 0 or len(rec_aidx_CB) == 0: continue # Return the sorted, unique values that are in both of the input arrays. rec_aidx = np.intersect1d(rec_aidx_CB, rec_aidx_CF) if VERBOSE: print "Items CB: " + str(len(rec_aidx_CB)) print "Items CF: " + str(len(rec_aidx_CF)) print "Recommended items: " + str(len(rec_aidx)) print "Predicted to be: " + str(_recommended_artists) ################################ # Compute performance measures # ################################ # Correctly predicted artists correct_aidx = np.intersect1d(u_aidx[test_aidx], rec_aidx) # TP - True Positives is amount of overlap in recommended artists and test artists # FP - False Positives is recommended artists minus correctly predicted ones TP = len(correct_aidx) FP = len(np.setdiff1d(rec_aidx, correct_aidx)) # Precision is percentage of correctly predicted among predicted # Handle special case that not a single artist could be recommended -> by definition, precision = 100% if len(rec_aidx) == 0: prec = 100.0 else: prec = 100.0 * TP / len(rec_aidx) # Recall is percentage of correctly predicted among all listened to # Handle special case that there is no single artist in the test set -> by definition, recall = 100% if len(test_aidx) == 0: rec = 100.0 else: rec = 100.0 * TP / len(test_aidx) # add precision and recall for current user and fold to aggregate variables avg_prec += prec / (NF * no_users) avg_rec += rec / (NF * no_users) # Output precision and recall of current fold if VERBOSE: print("\tPrecision: %.2f, Recall: %.2f" % (prec, rec)) # Increase fold counter fold += 1 f1_score = 2 * ((avg_prec * avg_rec) / (avg_prec + avg_rec)) # Output mean average precision and recall if VERBOSE: print("MAP: %.3f, MAR: %.3f, F1 Score: %.3f" % (avg_prec, avg_rec, f1_score)) print("K neighbors: " + str(_K)) print("Recommendations: " + str(_recommended_artists)) data = {} data['avg_prec'] = avg_prec data['avg_rec'] = avg_rec data['f1_score'] = f1_score data['recommended'] = False return data