예제 #1
0
    def test_get_hook_base_dir(self, provide_directory_mock):
        fileCache = FileCache(self.config)
        # Check missing parameter
        command = {'commandParams': {}}
        base = fileCache.get_hook_base_dir(command, "server_url_pref")
        self.assertEqual(base, None)
        self.assertFalse(provide_directory_mock.called)

        # Check existing dir case
        command = {
            'commandParams': {
                'hooks_folder': os.path.join('HDP', '2.1.1', 'hooks')
            }
        }
        provide_directory_mock.return_value = "dummy value"
        fileCache = FileCache(self.config)
        res = fileCache.get_hook_base_dir(command, "server_url_pref")
        self.assertEquals(
            pprint.pformat(provide_directory_mock.call_args_list[0][0]),
            "('/var/lib/ambari-agent/cache', "
            "{0}, "
            "'server_url_pref')".format(
                pprint.pformat(os.path.join('stacks', 'HDP', '2.1.1',
                                            'hooks'))))
        self.assertEquals(res, "dummy value")
예제 #2
0
def cache_factory(mgr, kind):
    if kind == cache_options[0]:
        return Cache(mgr)
    elif kind == cache_options[1]:
        return FileCache(mgr)
    else:
        raise ValueError("%s is not a valid cache type!".format(kind))
예제 #3
0
    def __init__(self, config, controller):
        self.config = config
        self.tmp_dir = config.get('agent', 'prefix')
        self.force_https_protocol = config.get_force_https_protocol()
        self.exec_tmp_dir = Constants.AGENT_TMP_DIR
        self.file_cache = FileCache(config)
        self.status_commands_stdout = os.path.join(
            self.tmp_dir, 'status_command_stdout.txt')
        self.status_commands_stderr = os.path.join(
            self.tmp_dir, 'status_command_stderr.txt')
        self.public_fqdn = hostname.public_hostname(config)
        # cache reset will be called on every agent registration
        controller.registration_listeners.append(self.file_cache.reset)

        # Construct the hadoop credential lib JARs path
        self.credential_shell_lib_path = os.path.join(
            config.get('security', 'credential_lib_dir',
                       self.DEFAULT_CREDENTIAL_SHELL_LIB_PATH), '*')

        self.credential_conf_dir = config.get('security',
                                              'credential_conf_dir',
                                              self.DEFAULT_CREDENTIAL_CONF_DIR)

        self.credential_shell_cmd = config.get(
            'security', 'credential_shell_cmd',
            self.DEFAULT_CREDENTIAL_SHELL_CMD)

        # Clean up old status command files if any
        try:
            os.unlink(self.status_commands_stdout)
            os.unlink(self.status_commands_stderr)
        except OSError:
            pass  # Ignore fail
        self.commands_in_progress_lock = threading.RLock()
        self.commands_in_progress = {}
예제 #4
0
    def __generate(self):
        self.__process()
        for namespace in self.api_description.namespaces:
            Capi.__substitute_implementation_class_name(namespace)
        namespace_generators = create_namespace_generators(
            self.api_description, self.params_description)
        by_first_argument_exception_traits = ExceptionTraits.ByFirstArgument(
            self.params_description, namespace_generators)
        no_handling_exception_traits = ExceptionTraits.NoHandling()
        if self.params_description.exception_handling_mode == TExceptionHandlingMode.by_first_argument:
            main_exception_traits = by_first_argument_exception_traits
        else:
            main_exception_traits = no_handling_exception_traits
        capi_generator = CapiGenerator(main_exception_traits,
                                       no_handling_exception_traits,
                                       self.params_description,
                                       self.api_description)
        file_cache = FileCache(self.params_description)
        for namespace_generator in namespace_generators:
            namespace_generator.generate(file_cache, capi_generator)
        capi_generator.generate(file_cache)
        self.__generate_root_header(namespace_generators, file_cache)

        if self.unit_tests_generator:
            self.unit_tests_generator.generate(namespace_generators)
예제 #5
0
    def test_unpack_archive(self):
        tmpdir = tempfile.mkdtemp()
        dummy_archive_name = os.path.join("ambari_agent", "dummy_files",
                                          "dummy_archive.zip")
        archive_file = open(dummy_archive_name, "rb")
        fileCache = FileCache(self.config)
        fileCache.unpack_archive(archive_file, tmpdir)
        # Count summary size of unpacked files:
        total_size = 0
        total_files = 0
        total_dirs = 0
        for dirpath, dirnames, filenames in os.walk(tmpdir):
            total_dirs += 1
            for f in filenames:
                fp = os.path.join(dirpath, f)
                total_size += os.path.getsize(fp)
                total_files += 1
        self.assertEquals(total_size, 51258L)
        self.assertEquals(total_files, 28)
        self.assertEquals(total_dirs, 8)
        shutil.rmtree(tmpdir)

        # Test exception handling
        with patch("os.path.isdir") as isdir_mock:
            isdir_mock.side_effect = self.exc_side_effect
            try:
                fileCache.unpack_archive(archive_file, tmpdir)
                self.fail('CachingException not thrown')
            except CachingException:
                pass  # Expected
            except Exception, e:
                self.fail('Unexpected exception thrown:' + str(e))
예제 #6
0
 def test_get_service_base_dir(self, isdir_mock):
     fileCache = FileCache(self.config)
     isdir_mock.return_value = True
     base = fileCache.get_service_base_dir("HDP", "2.0.7", "HBASE",
                                           "REGION_SERVER")
     self.assertEqual(
         base,
         "/var/lib/ambari-agent/cache/stacks/HDP/2.0.7/services/HBASE")
예제 #7
0
 def test_get_custom_actions_base_dir(self, provide_directory_mock):
   provide_directory_mock.return_value = "dummy value"
   fileCache = FileCache(self.config)
   res = fileCache.get_custom_actions_base_dir("server_url_pref")
   self.assertEquals(
     pprint.pformat(provide_directory_mock.call_args_list[0][0]),
     "('/var/lib/ambari-agent/cache', 'custom_actions', 'server_url_pref')")
   self.assertEquals(res, "dummy value")
예제 #8
0
    def test_invalidate_directory(self, makedirs_mock, rmtree_mock,
                                  unlink_mock, isdir_mock, isfile_mock,
                                  exists_mock):
        fileCache = FileCache(self.config)
        # Test execution flow if path points to file
        isfile_mock.return_value = True
        isdir_mock.return_value = False
        exists_mock.return_value = True

        fileCache.invalidate_directory("dummy-dir")

        self.assertTrue(unlink_mock.called)
        self.assertFalse(rmtree_mock.called)
        self.assertTrue(makedirs_mock.called)

        unlink_mock.reset_mock()
        rmtree_mock.reset_mock()
        makedirs_mock.reset_mock()

        # Test execution flow if path points to dir
        isfile_mock.return_value = False
        isdir_mock.return_value = True
        exists_mock.return_value = True

        fileCache.invalidate_directory("dummy-dir")

        self.assertFalse(unlink_mock.called)
        self.assertTrue(rmtree_mock.called)
        self.assertTrue(makedirs_mock.called)

        unlink_mock.reset_mock()
        rmtree_mock.reset_mock()
        makedirs_mock.reset_mock()

        # Test execution flow if path points nowhere
        isfile_mock.return_value = False
        isdir_mock.return_value = False
        exists_mock.return_value = False

        fileCache.invalidate_directory("dummy-dir")

        self.assertFalse(unlink_mock.called)
        self.assertFalse(rmtree_mock.called)
        self.assertTrue(makedirs_mock.called)

        unlink_mock.reset_mock()
        rmtree_mock.reset_mock()
        makedirs_mock.reset_mock()

        # Test exception handling
        makedirs_mock.side_effect = self.exc_side_effect
        try:
            fileCache.invalidate_directory("dummy-dir")
            self.fail('CachingException not thrown')
        except CachingException:
            pass  # Expected
        except Exception, e:
            self.fail('Unexpected exception thrown:' + str(e))
예제 #9
0
 def test_build_download_url(self):
     fileCache = FileCache(self.config)
     url = fileCache.build_download_url('http://localhost:8080/resources/',
                                        'stacks/HDP/2.1.1/hooks',
                                        'archive.zip')
     self.assertEqual(
         url,
         'http://localhost:8080/resources//stacks/HDP/2.1.1/hooks/archive.zip'
     )
예제 #10
0
 def test_get_service_base_dir(self, provide_directory_mock):
   provide_directory_mock.return_value = "dummy value"
   fileCache = FileCache(self.config)
   command = {
     'commandParams' : {
       'service_package_folder' : 'HDP/2.1.1/services/ZOOKEEPER/package'
     }
   }
   res = fileCache.get_service_base_dir(command, "server_url_pref")
   self.assertEquals(
     pprint.pformat(provide_directory_mock.call_args_list[0][0]),
     "('/var/lib/ambari-agent/cache',\n "
     "'stacks/HDP/2.1.1/services/ZOOKEEPER/package',\n"
     " 'server_url_pref')")
   self.assertEquals(res, "dummy value")
예제 #11
0
 def __init__(self, config, controller):
   self.config = config
   self.tmp_dir = config.get('agent', 'prefix')
   self.file_cache = FileCache(config)
   self.python_executor = PythonExecutor(self.tmp_dir, config)
   self.status_commands_stdout = os.path.join(self.tmp_dir,
                                              'status_command_stdout.txt')
   self.status_commands_stderr = os.path.join(self.tmp_dir,
                                              'status_command_stderr.txt')
   # cache reset will be called on every agent registration
   controller.registration_listeners.append(self.file_cache.reset)
   # Clean up old status command files if any
   try:
     os.unlink(self.status_commands_stdout)
     os.unlink(self.status_commands_stderr)
   except OSError:
     pass # Ignore fail
예제 #12
0
 def process_external_namespaces(namespaces: [object], external_namespaces: [object]):
     for cur_namespace in namespaces:
         external_namespace = TExternalNamespace()
         external_namespace.name = cur_namespace.name
         external_namespace.detach_method_name = new_params.detach_method_name
         external_namespace.get_raw_pointer_method_name = new_params.get_raw_pointer_method_name
         file_cache = FileCache(new_params)
         external_namespace.include = file_cache.namespace_header(cur_namespace.full_name_array)
         process_external_namespaces(cur_namespace.nested_namespaces, external_namespace.namespaces)
         for cur_class in cur_namespace.classes:
             external_class = TExternalClass()
             external_class.name = cur_class.name
             external_class.wrap_name = cur_class.wrap_name
             external_class.include_declaration = file_cache.class_header_decl(cur_class.full_name_array)
             external_class.include_definition = file_cache.class_header(cur_class.full_name_array)
             external_namespace.classes.append(external_class)
         external_namespaces.append(external_namespace)
예제 #13
0
    def test_provide_directory_no_update(self, build_download_url_mock):
        try:
            self.config.set(AmbariConfig.AMBARI_PROPERTIES_CATEGORY,
                            FileCache.ENABLE_AUTO_AGENT_CACHE_UPDATE_KEY,
                            "false")
            fileCache = FileCache(self.config)

            # Test uptodate dirs after start
            path = os.path.join("cache_path", "subdirectory")
            res = fileCache.provide_directory("cache_path", "subdirectory",
                                              "server_url_prefix")
            self.assertEquals(res, path)
            self.assertFalse(build_download_url_mock.called)
        finally:
            self.config.set(AmbariConfig.AMBARI_PROPERTIES_CATEGORY,
                            FileCache.ENABLE_AUTO_AGENT_CACHE_UPDATE_KEY,
                            "true")
        pass
예제 #14
0
    def __init__(self, config, controller):
        self.config = config
        self.tmp_dir = config.get('agent', 'prefix')
        self.exec_tmp_dir = Constants.AGENT_TMP_DIR
        self.file_cache = FileCache(config)
        self.status_commands_stdout = os.path.join(
            self.tmp_dir, 'status_command_stdout.txt')
        self.status_commands_stderr = os.path.join(
            self.tmp_dir, 'status_command_stderr.txt')
        self.public_fqdn = hostname.public_hostname(config)
        # cache reset will be called on every agent registration
        controller.registration_listeners.append(self.file_cache.reset)

        # Clean up old status command files if any
        try:
            os.unlink(self.status_commands_stdout)
            os.unlink(self.status_commands_stderr)
        except OSError:
            pass  # Ignore fail
        self.commands_in_progress_lock = threading.RLock()
        self.commands_in_progress = {}
예제 #15
0
 def test_read_write_hash_sum(self):
     tmpdir = tempfile.mkdtemp()
     dummyhash = "DUMMY_HASH"
     fileCache = FileCache(self.config)
     fileCache.write_hash_sum(tmpdir, dummyhash)
     newhash = fileCache.read_hash_sum(tmpdir)
     self.assertEquals(newhash, dummyhash)
     shutil.rmtree(tmpdir)
     # Test read of not existing file
     newhash = fileCache.read_hash_sum(tmpdir)
     self.assertEquals(newhash, None)
     # Test write to not existing file
     with patch("__builtin__.open") as open_mock:
         open_mock.side_effect = self.exc_side_effect
         try:
             fileCache.write_hash_sum(tmpdir, dummyhash)
             self.fail('CachingException not thrown')
         except CachingException:
             pass  # Expected
         except Exception, e:
             self.fail('Unexpected exception thrown:' + str(e))
예제 #16
0
  def test_fetch_url(self, urlopen_mock):
    fileCache = FileCache(self.config)
    remote_url = "http://dummy-url/"
    # Test normal download
    test_str = 'abc' * 100000 # Very long string
    test_string_io = StringIO.StringIO(test_str)
    test_buffer = MagicMock()
    test_buffer.read.side_effect = test_string_io.read
    urlopen_mock.return_value = test_buffer

    memory_buffer = fileCache.fetch_url(remote_url)

    self.assertEquals(memory_buffer.getvalue(), test_str)
    self.assertEqual(test_buffer.read.call_count, 20) # depends on buffer size
    # Test exception handling
    test_buffer.read.side_effect = self.exc_side_effect
    try:
      fileCache.fetch_url(remote_url)
      self.fail('CachingException not thrown')
    except CachingException:
      pass # Expected
    except Exception, e:
      self.fail('Unexpected exception thrown:' + str(e))
def run(_K, _recommended_artists):
    avg_prec = 0
    avg_rec = 0
    no_users = UAM.shape[0]
    no_artists = UAM.shape[1]

    cf_file = FileCache("CF", _K, _recommended_artists)
    cb_file = FileCache("CB_Wiki", _K, _recommended_artists)
    pb_file = FileCache("PB", 1, _recommended_artists)

    recommended_artists = {}

    for u in range(0, no_users):
        # Get seed user's artists listened to
        u_aidx = np.nonzero(UAM[u, :])[0]

        if NF >= len(u_aidx) or u == no_users - 1:
            continue

        # Split user's artists into train and test set for cross-fold (CV) validation
        fold = 0
        kf = cross_validation.KFold(
            len(u_aidx), n_folds=NF)  # create folds (splits) for 5-fold CV
        for train_aidx, test_aidx in kf:  # for all folds
            # Show progress
            if VERBOSE:
                print "User: "******", Fold: " + str(fold) + ", Training items: " + str(
                        len(train_aidx)) + ", Test items: " + str(
                            len(test_aidx)
                        ),  # the comma at the end avoids line break

            # Call recommend function
            copy_UAM = UAM.copy(
            )  # we need to create a copy of the UAM, otherwise modifications within recommend function will effect the variable

            dict_rec_aidx_CB = cb_file.read_for_hybrid(
                u, fold)  #recommend_CB(AAM, u_aidx[train_aidx], _K)
            dict_rec_aidx_PB = pb_file.read_for_hybrid(
                u, fold
            )  #recommend_PB(copy_UAM, u_aidx[train_aidx], _recommended_artists)
            dict_rec_aidx_CF = cf_file.read_for_hybrid(
                u, fold
            )  #recommend_CF(copy_UAM, u_aidx[train_aidx], _recommended_artists)

            # @JPEER check in group if that solution is fair enough
            if len(dict_rec_aidx_CB) == 0 or len(dict_rec_aidx_PB) == 0 or len(
                    dict_rec_aidx_CF) == 0:
                continue

            # Fuse scores given by CB and by PB recommenders
            # First, create matrix to hold scores per recommendation method per artist
            scores = np.zeros(shape=(3, no_artists), dtype=np.float32)

            # Add scores from CB and CF recommenders to this matrix
            for aidx in dict_rec_aidx_CB.keys():
                scores[0, aidx] = dict_rec_aidx_CB[aidx]

            for aidx in dict_rec_aidx_PB.keys():
                scores[1, aidx] = dict_rec_aidx_PB[aidx]

            for aidx in dict_rec_aidx_CF.keys():
                scores[2, aidx] = dict_rec_aidx_CF[aidx]

            # Convert scores to ranks
            ranks = np.zeros(shape=(3, no_artists),
                             dtype=np.int16)  # init rank matrix

            for m in range(0, scores.shape[0]):  # for all methods to fuse
                aidx_nz = np.nonzero(
                    scores[m])[0]  # identify artists with positive scores
                scores_sorted_idx = np.argsort(
                    scores[m, aidx_nz]
                )  # sort artists with positive scores according to their score
                # Insert votes (i.e., inverse ranks) for each artist and current method

                for a in range(0, len(scores_sorted_idx)):
                    ranks[m, aidx_nz[scores_sorted_idx[a]]] = a + 1

            # Sum ranks over different approaches
            ranks_fused = np.sum(ranks, axis=0)
            # Sort and select top K_HR artists to recommend
            sorted_idx = np.argsort(ranks_fused)
            sorted_idx_top = sorted_idx[-_recommended_artists:]
            # Put (artist index, score) pairs of highest scoring artists in a dictionary
            dict_rec_aidx = {}

            for i in range(0, len(sorted_idx_top)):
                dict_rec_aidx[sorted_idx_top[i]] = ranks_fused[
                    sorted_idx_top[i]]

            # Distill recommended artist indices from dictionary returned by the recommendation functions
            rec_aidx = dict_rec_aidx.keys()

            if VERBOSE:
                print "Recommended items: ", len(rec_aidx)

            # Compute performance measures
            correct_aidx = np.intersect1d(
                u_aidx[test_aidx], rec_aidx)  # correctly predicted artists
            # True Positives is amount of overlap in recommended artists and test artists
            TP = len(correct_aidx)
            # False Positives is recommended artists minus correctly predicted ones
            FP = len(np.setdiff1d(rec_aidx, correct_aidx))

            # Precision is percentage of correctly predicted among predicted
            # Handle special case that not a single artist could be recommended -> by definition, precision = 100%
            if len(rec_aidx) == 0:
                prec = 100.0
            else:
                prec = 100.0 * TP / len(rec_aidx)

            # Recall is percentage of correctly predicted among all listened to
            # Handle special case that there is no single artist in the test set -> by definition, recall = 100%
            if len(test_aidx) == 0:
                rec = 100.0
            else:
                rec = 100.0 * TP / len(test_aidx)

            # add precision and recall for current user and fold to aggregate variables
            avg_prec += prec / (NF * no_users)
            avg_rec += rec / (NF * no_users)

            # Output precision and recall of current fold
            if VERBOSE:
                print("\tPrecision: %.2f, Recall:  %.2f" % (prec, rec))

            # Increase fold counter
            fold += 1

    # Output mean average precision and recall
    if VERBOSE:
        print("\nMAP: %.2f, MAR  %.2f" % (avg_prec, avg_rec))
        print("%.3f, %.3f" % (avg_prec, avg_rec))

    f1_score = 2 * ((avg_prec * avg_rec) / (avg_prec + avg_rec))

    data = {}
    data['avg_prec'] = avg_prec
    data['avg_rec'] = avg_rec
    data['f1_score'] = f1_score
    data['recommended'] = False

    return data
예제 #18
0
 def test_reset(self):
     fileCache = FileCache(self.config)
     fileCache.uptodate_paths.append('dummy-path')
     fileCache.reset()
     self.assertFalse(fileCache.uptodate_paths)
예제 #19
0
class TestFileCache(TestCase):
    def setUp(self):
        # disable stdout
        out = StringIO.StringIO()
        sys.stdout = out
        # generate sample config
        tmpdir = tempfile.gettempdir()
        self.config = ConfigParser.RawConfigParser()
        self.config.add_section('agent')
        self.config.set('agent', 'prefix', tmpdir)
        self.config.set('agent', 'cache_dir', "/var/lib/ambari-agent/cache")
        self.config.set('agent', 'tolerate_download_failures', "true")

    def test_reset(self):
        fileCache = FileCache(self.config)
        fileCache.uptodate_paths.append('dummy-path')
        fileCache.reset()
        self.assertFalse(fileCache.uptodate_paths)

    @patch.object(FileCache, "provide_directory")
    def test_get_service_base_dir(self, provide_directory_mock):
        provide_directory_mock.return_value = "dummy value"
        fileCache = FileCache(self.config)
        command = {
            'commandParams': {
                'service_package_folder':
                os.path.join('stacks', 'HDP', '2.1.1', 'services', 'ZOOKEEPER',
                             'package')
            }
        }
        res = fileCache.get_service_base_dir(command, "server_url_pref")
        self.assertEquals(
            pprint.pformat(provide_directory_mock.call_args_list[0][0]),
            "('/var/lib/ambari-agent/cache',\n "
            "{0},\n"
            " 'server_url_pref')".format(
                pprint.pformat(
                    os.path.join('stacks', 'HDP', '2.1.1', 'services',
                                 'ZOOKEEPER', 'package'))))
        self.assertEquals(res, "dummy value")

    @patch.object(FileCache, "provide_directory")
    def test_get_hook_base_dir(self, provide_directory_mock):
        fileCache = FileCache(self.config)
        # Check missing parameter
        command = {'commandParams': {}}
        base = fileCache.get_hook_base_dir(command, "server_url_pref")
        self.assertEqual(base, None)
        self.assertFalse(provide_directory_mock.called)

        # Check existing dir case
        command = {
            'commandParams': {
                'hooks_folder': os.path.join('HDP', '2.1.1', 'hooks')
            }
        }
        provide_directory_mock.return_value = "dummy value"
        fileCache = FileCache(self.config)
        res = fileCache.get_hook_base_dir(command, "server_url_pref")
        self.assertEquals(
            pprint.pformat(provide_directory_mock.call_args_list[0][0]),
            "('/var/lib/ambari-agent/cache', "
            "{0}, "
            "'server_url_pref')".format(
                pprint.pformat(os.path.join('stacks', 'HDP', '2.1.1',
                                            'hooks'))))
        self.assertEquals(res, "dummy value")

    @patch.object(FileCache, "provide_directory")
    def test_get_custom_actions_base_dir(self, provide_directory_mock):
        provide_directory_mock.return_value = "dummy value"
        fileCache = FileCache(self.config)
        res = fileCache.get_custom_actions_base_dir("server_url_pref")
        self.assertEquals(
            pprint.pformat(provide_directory_mock.call_args_list[0][0]),
            "('/var/lib/ambari-agent/cache', 'custom_actions', 'server_url_pref')"
        )
        self.assertEquals(res, "dummy value")

    @patch.object(FileCache, "build_download_url")
    @patch.object(FileCache, "fetch_url")
    @patch.object(FileCache, "read_hash_sum")
    @patch.object(FileCache, "invalidate_directory")
    @patch.object(FileCache, "unpack_archive")
    @patch.object(FileCache, "write_hash_sum")
    def test_provide_directory(self, write_hash_sum_mock, unpack_archive_mock,
                               invalidate_directory_mock, read_hash_sum_mock,
                               fetch_url_mock, build_download_url_mock):
        build_download_url_mock.return_value = "http://dummy-url/"
        HASH1 = "hash1"
        membuffer = MagicMock()
        membuffer.getvalue.return_value.strip.return_value = HASH1
        fileCache = FileCache(self.config)

        # Test uptodate dirs after start
        self.assertFalse(fileCache.uptodate_paths)
        path = os.path.join("cache_path", "subdirectory")
        # Test initial downloading (when dir does not exist)
        fetch_url_mock.return_value = membuffer
        read_hash_sum_mock.return_value = "hash2"
        res = fileCache.provide_directory("cache_path", "subdirectory",
                                          "server_url_prefix")
        self.assertTrue(invalidate_directory_mock.called)
        self.assertTrue(write_hash_sum_mock.called)
        self.assertEquals(fetch_url_mock.call_count, 2)
        self.assertEquals(pprint.pformat(fileCache.uptodate_paths),
                          pprint.pformat([path]))
        self.assertEquals(res, path)

        fetch_url_mock.reset_mock()
        write_hash_sum_mock.reset_mock()
        invalidate_directory_mock.reset_mock()
        unpack_archive_mock.reset_mock()

        # Test cache invalidation when local hash does not differ
        fetch_url_mock.return_value = membuffer
        read_hash_sum_mock.return_value = HASH1
        fileCache.reset()

        res = fileCache.provide_directory("cache_path", "subdirectory",
                                          "server_url_prefix")
        self.assertFalse(invalidate_directory_mock.called)
        self.assertFalse(write_hash_sum_mock.called)
        self.assertEquals(fetch_url_mock.call_count, 1)

        self.assertEquals(pprint.pformat(fileCache.uptodate_paths),
                          pprint.pformat([path]))
        self.assertEquals(res, path)

        fetch_url_mock.reset_mock()
        write_hash_sum_mock.reset_mock()
        invalidate_directory_mock.reset_mock()
        unpack_archive_mock.reset_mock()

        # Test execution path when path is up-to date (already checked)
        res = fileCache.provide_directory("cache_path", "subdirectory",
                                          "server_url_prefix")
        self.assertFalse(invalidate_directory_mock.called)
        self.assertFalse(write_hash_sum_mock.called)
        self.assertEquals(fetch_url_mock.call_count, 0)
        self.assertEquals(pprint.pformat(fileCache.uptodate_paths),
                          pprint.pformat([path]))
        self.assertEquals(res, path)

        # Check exception handling when tolerance is disabled
        self.config.set('agent', 'tolerate_download_failures', "false")
        fetch_url_mock.side_effect = self.caching_exc_side_effect
        fileCache = FileCache(self.config)
        try:
            fileCache.provide_directory("cache_path", "subdirectory",
                                        "server_url_prefix")
            self.fail('CachingException not thrown')
        except CachingException:
            pass  # Expected
        except Exception, e:
            self.fail('Unexpected exception thrown:' + str(e))

        # Check that unexpected exceptions are still propagated when
        # tolerance is enabled
        self.config.set('agent', 'tolerate_download_failures', "false")
        fetch_url_mock.side_effect = self.exc_side_effect
        fileCache = FileCache(self.config)
        try:
            fileCache.provide_directory("cache_path", "subdirectory",
                                        "server_url_prefix")
            self.fail('Exception not thrown')
        except Exception:
            pass  # Expected

        # Check exception handling when tolerance is enabled
        self.config.set('agent', 'tolerate_download_failures', "true")
        fetch_url_mock.side_effect = self.caching_exc_side_effect
        fileCache = FileCache(self.config)
        res = fileCache.provide_directory("cache_path", "subdirectory",
                                          "server_url_prefix")
        self.assertEquals(res, path)
 def __init__(self, config):
     self.config = config
     self.tmp_dir = config.get('agent', 'prefix')
     self.file_cache = FileCache(config)
     self.python_executor = PythonExecutor(self.tmp_dir, config)
예제 #21
0
def run(_K, _recommended_artists):
    # Initialize variables to hold performance measures
    avg_prec = 0  # mean precision
    avg_rec = 0  # mean recall

    df_a_file = FileCache("DF_age", _K, _recommended_artists)
    df_c_file = FileCache("DF_country", _K, _recommended_artists)
    df_g_file = FileCache("DF_gender", _K, _recommended_artists)

    # For all users in our data (UAM)
    no_users = UAM.shape[0]
    no_artists = UAM.shape[1]
    for u in range(0, no_users):

        # Get seed user's artists listened to
        u_aidx = np.nonzero(UAM[u, :])[0]

        if NF >= len(u_aidx) or u == no_users - 1:
            continue

        # Split user's artists into train and test set for cross-fold (CV) validation
        fold = 0
        kf = cross_validation.KFold(
            len(u_aidx), n_folds=NF)  # create folds (splits) for 5-fold CV

        for train_aidx, test_aidx in kf:  # for all folds
            if VERBOSE:
                print "User: "******", Fold: " + str(fold) + ", Training items: " + str(
                        len(train_aidx)) + ", Test items: " + str(
                            len(test_aidx)
                        ),  # the comma at the end avoids line break
            # Call recommend function
            copy_UAM = UAM.copy(
            )  # we need to create a copy of the UAM, otherwise modifications within recommend function will effect the variable

            ###############################################
            ## Combine CB and CF together so we get a HF ##
            ###############################################

            dict_rec_aidx_DF_A = df_a_file.read_for_hybrid(u, fold)
            dict_rec_aidx_DF_C = df_c_file.read_for_hybrid(u, fold)
            dict_rec_aidx_DF_G = df_g_file.read_for_hybrid(u, fold)

            # @JPEER check in group if that solution is fair enough
            if len(dict_rec_aidx_DF_A) == 0 or len(
                    dict_rec_aidx_DF_C) == 0 or len(dict_rec_aidx_DF_G) == 0:
                continue

            # Fuse scores given by CF and by CB recommenders
            # First, create matrix to hold scores per recommendation method per artist
            scores = np.zeros(shape=(3, no_artists), dtype=np.float32)

            # Add scores from CB and CF recommenders to this matrix
            for aidx in dict_rec_aidx_DF_A.keys():
                scores[0, aidx] = dict_rec_aidx_DF_A[aidx]

            for aidx in dict_rec_aidx_DF_C.keys():
                scores[1, aidx] = dict_rec_aidx_DF_C[aidx]

            for aidx in dict_rec_aidx_DF_G.keys():
                scores[2, aidx] = dict_rec_aidx_DF_G[aidx]

            # Apply aggregation function (here, just take arithmetic mean of scores)
            scores_fused = np.mean(scores, axis=0)

            # Sort and select top K_HR artists to recommend
            sorted_idx = np.argsort(scores_fused)
            sorted_idx_top = sorted_idx[-_recommended_artists:]

            # Put (artist index, score) pairs of highest scoring artists in a dictionary
            dict_rec_aidx = {}

            for i in range(0, len(sorted_idx_top)):
                dict_rec_aidx[sorted_idx_top[i]] = scores_fused[
                    sorted_idx_top[i]]

            # Distill recommended artist indices from dictionary returned by the recommendation functions
            rec_aidx = dict_rec_aidx.keys()

            if VERBOSE:
                print "Recommended items: ", len(rec_aidx)

            # Compute performance measures
            correct_aidx = np.intersect1d(
                u_aidx[test_aidx], rec_aidx)  # correctly predicted artists

            # TP - True Positives is amount of overlap in recommended artists and test artists
            # FP - False Positives is recommended artists minus correctly predicted ones
            TP = len(correct_aidx)
            FP = len(np.setdiff1d(rec_aidx, correct_aidx))

            # Precision is percentage of correctly predicted among predicted
            # Handle special case that not a single artist could be recommended -> by definition, precision = 100%
            if len(rec_aidx) == 0:
                prec = 100.0

            else:
                prec = 100.0 * TP / len(rec_aidx)

            # Recall is percentage of correctly predicted among all listened to
            # Handle special case that there is no single artist in the test set -> by definition, recall = 100%
            if len(test_aidx) == 0:
                rec = 100.0

            else:
                rec = 100.0 * TP / len(test_aidx)

            # add precision and recall for current user and fold to aggregate variables
            avg_prec += prec / (NF * no_users)
            avg_rec += rec / (NF * no_users)

            # Output precision and recall of current fold
            if VERBOSE:
                print("\tPrecision: %.2f, Recall:  %.2f" % (prec, rec))

            # Increase fold counter
            fold += 1

    f1_score = 2 * ((avg_prec * avg_rec) / (avg_prec + avg_rec))

    # Output mean average precision and recall
    if VERBOSE:
        print("\nMAP: %.2f, MAR  %.2f, F1 Scrore: %.2f" %
              (avg_prec, avg_rec, f1_score))
        print("%.3f, %.3f" % (avg_prec, avg_rec))
        print("K neighbors " + str(K))
        print("Recommendation: " + str(_recommended_artists))

    data = {}
    data['f1_score'] = f1_score
    data['avg_prec'] = avg_prec
    data['avg_rec'] = avg_rec
    data['recommended'] = False

    return data
예제 #22
0
#!/usr/bin/env python3
#
# proxy data cache test
#
import time
from FileCache import FileCache


def log(s):
    print(time.ctime() + " testFileCache: " + s)


fn1 = "testfiles1.txt"
log("creating fileCache1")
c = FileCache(fn1)

f1 = "size=8429&ed2k=C2FAD4A41C26FD8840A72350C9A10A47&fmask=7FF8FFF9FE&amask=0000FCC0"
log("getting file1..")
d1 = c.getFile(f1)
if None == d1:
    log("getFile1: " + str(d1) + " as expected")
else:
    log("getFile1: " + str(d1) + " - not expected")

d1 = "test|data|not|important & confusing"
log("adding file1..")
c.addFile(f1, d1)

log("getting file1 again..")
d2 = c.getFile(f1)
if d1 == d2:
예제 #23
0
    def test_provide_directory(self, write_hash_sum_mock, unpack_archive_mock,
                               invalidate_directory_mock, read_hash_sum_mock,
                               fetch_url_mock, build_download_url_mock):
        build_download_url_mock.return_value = "http://dummy-url/"
        HASH1 = "hash1"
        membuffer = MagicMock()
        membuffer.getvalue.return_value.strip.return_value = HASH1
        fileCache = FileCache(self.config)

        # Test uptodate dirs after start
        self.assertFalse(fileCache.uptodate_paths)
        path = os.path.join("cache_path", "subdirectory")
        # Test initial downloading (when dir does not exist)
        fetch_url_mock.return_value = membuffer
        read_hash_sum_mock.return_value = "hash2"
        res = fileCache.provide_directory("cache_path", "subdirectory",
                                          "server_url_prefix")
        self.assertTrue(invalidate_directory_mock.called)
        self.assertTrue(write_hash_sum_mock.called)
        self.assertEquals(fetch_url_mock.call_count, 2)
        self.assertEquals(pprint.pformat(fileCache.uptodate_paths),
                          pprint.pformat([path]))
        self.assertEquals(res, path)

        fetch_url_mock.reset_mock()
        write_hash_sum_mock.reset_mock()
        invalidate_directory_mock.reset_mock()
        unpack_archive_mock.reset_mock()

        # Test cache invalidation when local hash does not differ
        fetch_url_mock.return_value = membuffer
        read_hash_sum_mock.return_value = HASH1
        fileCache.reset()

        res = fileCache.provide_directory("cache_path", "subdirectory",
                                          "server_url_prefix")
        self.assertFalse(invalidate_directory_mock.called)
        self.assertFalse(write_hash_sum_mock.called)
        self.assertEquals(fetch_url_mock.call_count, 1)

        self.assertEquals(pprint.pformat(fileCache.uptodate_paths),
                          pprint.pformat([path]))
        self.assertEquals(res, path)

        fetch_url_mock.reset_mock()
        write_hash_sum_mock.reset_mock()
        invalidate_directory_mock.reset_mock()
        unpack_archive_mock.reset_mock()

        # Test execution path when path is up-to date (already checked)
        res = fileCache.provide_directory("cache_path", "subdirectory",
                                          "server_url_prefix")
        self.assertFalse(invalidate_directory_mock.called)
        self.assertFalse(write_hash_sum_mock.called)
        self.assertEquals(fetch_url_mock.call_count, 0)
        self.assertEquals(pprint.pformat(fileCache.uptodate_paths),
                          pprint.pformat([path]))
        self.assertEquals(res, path)

        # Check exception handling when tolerance is disabled
        self.config.set('agent', 'tolerate_download_failures', "false")
        fetch_url_mock.side_effect = self.caching_exc_side_effect
        fileCache = FileCache(self.config)
        try:
            fileCache.provide_directory("cache_path", "subdirectory",
                                        "server_url_prefix")
            self.fail('CachingException not thrown')
        except CachingException:
            pass  # Expected
        except Exception, e:
            self.fail('Unexpected exception thrown:' + str(e))
예제 #24
0
def run(_K, _recommended_artists):
    """
    Function to run an evaluation experiment
    """
    # Initialize variables to hold performance measures
    avg_prec = 0  # mean precision
    avg_rec = 0  # mean recall

    cb_file = FileCache("CB_Wiki", _K, _recommended_artists)
    cf_file = FileCache("CF", _K, _recommended_artists)

    # For all users in our data (UAM)
    no_users = UAM.shape[0]

    for u in range(0, no_users):

        # Get seed user's artists listened to
        # u_aidx = np.nonzero(UAM[u, :])[0]
        u_aidx = np.nonzero(UAM[u, :])[0]

        if NF >= len(u_aidx) or u == no_users - 1:
            continue

        # Split user's artists into train and test set for cross-fold (CV) validation
        fold = 0

        # create folds (splits) for 10-fold CV
        kf = cross_validation.KFold(len(u_aidx), n_folds=NF)

        # For all folds
        for train_aidx, test_aidx in kf:
            if VERBOSE:
                print "User: "******", Fold: " + str(fold) + ", Training items: " + str(
                        len(train_aidx)) + ", Test items: " + str(
                            len(test_aidx)),

            # Create a copy of the UAM, otherwise modifications within recommend function will effect the variable
            copy_UAM = UAM.copy()

            # Call recommend function
            rec_aidx_CF = cf_file.read_for_hybrid(
                u, fold)  # recommend_CF(copy_UAM, u, u_aidx[train_aidx])
            rec_aidx_CB = cb_file.read_for_hybrid(
                u, fold)  # recommend_CB(AAM, u_aidx[train_aidx], _K)

            # @JPEER check in group if that solution is fair enough
            if len(rec_aidx_CF) == 0 or len(rec_aidx_CB) == 0:
                continue

            # Return the sorted, unique values that are in both of the input arrays.
            rec_aidx = np.intersect1d(rec_aidx_CB, rec_aidx_CF)

            if VERBOSE:
                print "Items CB: " + str(len(rec_aidx_CB))
                print "Items CF: " + str(len(rec_aidx_CF))
                print "Recommended items: " + str(len(rec_aidx))
                print "Predicted to be: " + str(_recommended_artists)

            ################################
            # Compute performance measures #
            ################################

            # Correctly predicted artists
            correct_aidx = np.intersect1d(u_aidx[test_aidx], rec_aidx)

            # TP - True Positives is amount of overlap in recommended artists and test artists
            # FP - False Positives is recommended artists minus correctly predicted ones
            TP = len(correct_aidx)
            FP = len(np.setdiff1d(rec_aidx, correct_aidx))

            # Precision is percentage of correctly predicted among predicted
            # Handle special case that not a single artist could be recommended -> by definition, precision = 100%
            if len(rec_aidx) == 0:
                prec = 100.0

            else:
                prec = 100.0 * TP / len(rec_aidx)

            # Recall is percentage of correctly predicted among all listened to
            # Handle special case that there is no single artist in the test set -> by definition, recall = 100%
            if len(test_aidx) == 0:
                rec = 100.0

            else:
                rec = 100.0 * TP / len(test_aidx)

            # add precision and recall for current user and fold to aggregate variables
            avg_prec += prec / (NF * no_users)
            avg_rec += rec / (NF * no_users)

            # Output precision and recall of current fold
            if VERBOSE:
                print("\tPrecision: %.2f, Recall:  %.2f" % (prec, rec))

            # Increase fold counter
            fold += 1

    f1_score = 2 * ((avg_prec * avg_rec) / (avg_prec + avg_rec))

    # Output mean average precision and recall
    if VERBOSE:
        print("MAP: %.3f, MAR: %.3f, F1 Score: %.3f" %
              (avg_prec, avg_rec, f1_score))
        print("K neighbors: " + str(_K))
        print("Recommendations: " + str(_recommended_artists))

    data = {}
    data['avg_prec'] = avg_prec
    data['avg_rec'] = avg_rec
    data['f1_score'] = f1_score
    data['recommended'] = False

    return data