def test_update_unfit_groups_with_crossgroup_dist_03(self):

        # load test data
        user_ids, user_profile_df, user_connection_df = load_sample_test_data()
        # define test data
        fit_group = {0:[], 1:[]}
        fit_pvals = {0:[], 1:[]}
        unfit_group = {0: ['d', 'e'], 1: ['a', 'b', 'c']}
        buffer_group = []
        dist_metrics = {0: [1, 1, 1], 1: [0, 0, 1]}

        new_fit_group, new_fit_pvals, buffer_group = _update_unfit_groups_with_crossgroup_dist(dist_metrics,
                                                                                               fit_group,
                                                                                               fit_pvals,
                                                                                               unfit_group,
                                                                                               buffer_group,
                                                                                               user_ids,
                                                                                               user_profile_df,
                                                                                               user_connection_df,
                                                                                               ks_alpha=0.05)

        print("--- _update_unfit_groups_with_crossgroup_dist (with reverse dist metrics)---")
        print("1st unfit_group: {}".format(unfit_group[0]))
        print("2nd unfit_group: {}".format(unfit_group[1]))
        print("new 1st group's distance metrics: {}".format(dist_metrics[0]))
        print("new 2nd group's distance metrics: {}".format(dist_metrics[1]))
        print("new 1st fit_group: {}".format(new_fit_group[0]))
        print("new 2nd fit_group: {}".format(new_fit_group[1]))
        print("new 1st fit_pval: {}".format(new_fit_pvals[0]))
        print("new 2nd fit_pval: {}".format(new_fit_pvals[1]))
        print("buffer_group: {}".format(buffer_group))
    def test_single_run_05(self):

        user_ids, user_profiles, user_connections = load_sample_test_data()

        # definte test data
        fit_group = {0: [], 1: ['a', 'd', 'e']}
        fit_pvals = {0: [], 1: [1, 1]}
        dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]}
        buffer_group = []

        try:
            single_run_res = _groupwise_dist_learning_single_run(
                dist_metrics,
                fit_group,
                fit_pvals,
                buffer_group,
                user_ids,
                user_profiles,
                user_connections,
                ks_alpha=0.05,
                min_group_size=1,
                verbose=True)
            # test internal mehtod regardin validating input data
            # it should capture the illegal input
            is_ok = False
        except:
            is_ok = True
        self.assertTrue(is_ok)
    def test_update_fit_group_with_groupwise_dist_01(self):
        """ test with generic distance metrics """

        # load test data
        user_ids, user_profile_df, user_connection_df = load_sample_test_data()

        # definte test data
        fit_group = {0: ['a', 'b', 'c'], 1: ['d', 'e']}
        fit_pvals = {0: [1, 1, 1], 1: [1, 1]}
        dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]}
        dist_memory_container = [{}, {}]

        new_fit_group, new_fit_pvals, unfit_group = _update_fit_group_with_groupwise_dist(
            dist_metrics,
            fit_group,
            fit_pvals,
            user_ids,
            user_profile_df,
            user_connection_df,
            dist_memory_container,
            is_directed=False,
            ks_alpha=0.05)

        print(
            "--- update_fit_group_with_groupwise (with: generic distance metrics) ---"
        )
        print("new 1st fit_group: {}".format(new_fit_group[0]))
        print("new 2nd fit_group: {}".format(new_fit_group[1]))
        print("new 1st fit_pval: {}".format(new_fit_pvals[0]))
        print("new 2nd fit_pval: {}".format(new_fit_pvals[1]))

        is_ok = True
        self.assertTrue(is_ok, True)
    def test_update_buffer_group(self):
        """ test _update_buffer_group """
        # load test data
        user_ids, user_profile_df, user_connection_df = load_sample_test_data()
        # define test data
        fit_group = {0: [], 1: []}
        fit_pvals = {0: [], 1: []}
        buffer_group = user_ids
        new_dist_metrics = {0: [0, 1, 0], 1: [0, 0, 1]}
        dist_memory_container = [{}, {}]

        new_fit_group, new_fit_pvals, unfit_group = _update_buffer_group(
            new_dist_metrics,
            fit_group,
            fit_pvals,
            buffer_group,
            user_ids,
            user_profile_df,
            user_connection_df,
            dist_memory_container,
            ks_alpha=0.5)

        print("--- _update_buffer_group (with empty fit_group)---")
        print("new 1st fit_group: {}".format(new_fit_group[0]))
        print("new 2nd fit_group: {}".format(new_fit_group[1]))

        is_ok = True
        self.assertTrue(is_ok, True)
    def test_single_run_03(self):
        user_ids, user_profiles, user_connections = load_sample_test_data()

        # definte test data
        fit_group = {0: [], 1: ['a', 'd', 'e', 'b', 'c']}
        fit_pvals = {0: [], 1: [1, 1, 1, 1, 1]}
        dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]}
        buffer_group = []

        single_run_res = _groupwise_dist_learning_single_run(dist_metrics,
                                                             fit_group,
                                                             fit_pvals,
                                                             buffer_group,
                                                             user_ids,
                                                             user_profiles,
                                                             user_connections,
                                                             ks_alpha=0.05,
                                                             min_group_size=1,
                                                             verbose=True)

        new_dist_metrics, new_fit_group, new_fit_pvals, new_buffer_group = single_run_res

        print("--- single run test ---")
        print("1st new_dist_metrics's distance metrics: {}".format(
            new_dist_metrics[0]))
        print("1nd new_dist_metrics's distance metrics: {}".format(
            new_dist_metrics[1]))
        print("new 1st fit_group: {}".format(new_fit_group[0]))
        print("new 2nd fit_group: {}".format(new_fit_group[1]))
        print("new 1st fit_pval: {}".format(new_fit_pvals[0]))
        print("new 2nd fit_pval: {}".format(new_fit_pvals[1]))
        print("buffer_group: {}".format(new_buffer_group))
    def test_update_groupwise_dist(self):

        user_ids, user_profile_df, user_connection_df = load_sample_test_data()

        # define test data
        fit_group = {0: ['a', 'b', 'c'], 1: ['d', 'e']}
        dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]}

        new_dist_metrics = _update_groupwise_dist(dist_metrics,
                                                  fit_group,
                                                  user_ids,
                                                  user_profile_df,
                                                  user_connection_df,
                                                  min_group_size=1)

        print(
            "--- test_update_groupwise_dist (with generic metrics as inputs) ---"
        )
        print("1st group's old distance weights: {}".format(dist_metrics[0]))
        print("2nd group's old distance weights: {}".format(dist_metrics[1]))
        print("1st group's new distance weights: {}".format(
            new_dist_metrics[0]))
        print("2nd group's new distance weights: {}".format(
            new_dist_metrics[1]))

        is_ok = True
        self.assertTrue(is_ok, True)
    def test_update_fit_group_with_groupwise_dist_02(self):
        """ test with generic distance metrics """

        # load test data
        user_ids, user_profile_df, user_connection_df = load_sample_test_data()
        # definte test data
        fit_group = {0: ['a', 'b', 'c'], 1: ['d', 'e']}
        fit_pvals = {0: [1, 1, 1], 1: [1, 1]}
        new_dist_metrics = {0: [0, 1, 0], 1: [0, 0, 1]}

        new_fit_group, new_fit_pvals, unfit_group = _update_fit_group_with_groupwise_dist(new_dist_metrics,
                                                                                          fit_group,
                                                                                          fit_pvals,
                                                                                          user_ids,
                                                                                          user_profile_df,
                                                                                          user_connection_df,
                                                                                          ks_alpha=0.05)

        print("--- update_fit_group_with_groupwise (with: learned distance metrics) ---")
        print("new 1st fit_group: {}".format(new_fit_group[0]))
        print("new 2nd fit_group: {}".format(new_fit_group[1]))
        print("new 1st fit_pval: {}".format(new_fit_pvals[0]))
        print("new 2nd fit_pval: {}".format(new_fit_pvals[1]))

        is_ok = True
        self.assertTrue(is_ok, True)
    def test_learner_01(self):

        user_ids, user_profiles, user_connections = load_sample_test_data()

        best_pack = groupwise_dist_learning(user_ids,
                                            user_profiles,
                                            user_connections,
                                            n_group=2,
                                            max_iter=20,
                                            max_nogain_streak=5,
                                            min_group_size=1,
                                            ks_alpha=0.1,
                                            init='zipf',
                                            verbose=True,
                                            C=0.1)

        knowledge_pack, best_score = best_pack
        new_dist_metrics, new_fit_group, new_buffer_group = knowledge_pack

        print("--- leaner test (n_group=2) ---")
        print("1st new_dist_metrics's distance metrics: {}".format(
            new_dist_metrics[0]))
        print("1nd new_dist_metrics's distance metrics: {}".format(
            new_dist_metrics[1]))
        print("new 1st fit_group: {}".format(new_fit_group[0]))
        print("new 2nd fit_group: {}".format(new_fit_group[1]))
        print("buffer_group: {}".format(new_buffer_group))
    def test_learner_class_init_zipf(self):
        user_ids, user_profiles, user_connections = load_sample_test_data()

        gwd_learner = GroupwiseDistLearner(n_group=2, min_group_size=1, init="zipf", max_iter=10, verbose=True)
        gwd_learner.fit(user_ids, user_profiles, user_connections)

        print("--- learner class (n_group=2) with init='zipf' ---")
        print("best score: {}".format(gwd_learner.get_score()))
 def test_validate_user_informationg_with_good_data(self):
     user_ids, user_profile_df, user_connection_df = load_sample_test_data()
     try:
         _validate_user_information(user_ids = user_ids,
                                    user_profiles = user_profile_df,
                                    user_connections = user_connection_df)
         is_ok = True
     except:
         is_ok = False
     self.assertTrue(is_ok)
 def test_validate_user_informationg_with_good_data(self):
     user_ids, user_profile_df, user_connection_df = load_sample_test_data()
     try:
         _validate_user_information(user_ids=user_ids,
                                    user_profiles=user_profile_df,
                                    user_connections=user_connection_df)
         is_ok = True
     except:
         is_ok = False
     self.assertTrue(is_ok)
コード例 #12
0
    def test_learner_class_init_zipf(self):
        user_ids, user_profiles, user_connections = load_sample_test_data()

        gwd_learner = GroupwiseDistLearner(n_group=2,
                                           min_group_size=1,
                                           init="zipf",
                                           max_iter=10,
                                           verbose=True)
        gwd_learner.fit(user_ids, user_profiles, user_connections)

        print("--- learner class (n_group=2) with init='zipf' ---")
        print("best score: {}".format(gwd_learner.get_score()))
    def test_validate_user_informationg_with_bad_data(self):

        user_ids, user_profile_df, user_connection_df = load_sample_test_data()
        try:
            # remove first user_id's record
            _validate_user_information(user_ids = user_ids[1:],
                                       user_profiles = user_profile_df,
                                       user_connections = user_connection_df)
            is_ok = False
        except:
            is_ok = True
        self.assertTrue(is_ok)
    def test_validate_user_informationg_with_bad_data(self):

        user_ids, user_profile_df, user_connection_df = load_sample_test_data()
        try:
            # remove first user_id's record
            _validate_user_information(user_ids=user_ids[1:],
                                       user_profiles=user_profile_df,
                                       user_connections=user_connection_df)
            is_ok = False
        except:
            is_ok = True
        self.assertTrue(is_ok)
コード例 #15
0
    def test_learner_02(self):
        user_ids, user_profiles, user_connections = load_sample_test_data()

        best_pack = groupwise_dist_learning(user_ids, user_profiles, user_connections, n_group=1,
                                            max_iter=20, max_nogain_streak=5, min_group_size=1, ks_alpha=0.1,
                                            init='zipf', verbose=True, C=0.1)

        knowledge_pack, best_score = best_pack
        new_dist_metrics, new_fit_group, new_buffer_group = knowledge_pack

        print("--- leaner test (n_group=1) ---")
        print("1st new_dist_metrics's distance metrics: {}".format(new_dist_metrics[0]))
        print("new 1st fit_group: {}".format(new_fit_group[0]))
        print("buffer_group: {}".format(new_buffer_group))
    def test_update_buffer_group(self):
        """ test _update_buffer_group """
        # load test data
        user_ids, user_profile_df, user_connection_df = load_sample_test_data()
        # define test data
        fit_group = {0:[], 1:[]}
        fit_pvals = {0:[], 1:[]}
        buffer_group = user_ids
        new_dist_metrics = {0: [0, 1, 0], 1: [0, 0, 1]}

        new_fit_group, new_fit_pvals, unfit_group = _update_buffer_group(new_dist_metrics, fit_group, fit_pvals,
                                                                         buffer_group, user_ids, user_profile_df,
                                                                         user_connection_df, ks_alpha=0.5)

        print("--- _update_buffer_group (with empty fit_group)---")
        print("new 1st fit_group: {}".format(new_fit_group[0]))
        print("new 2nd fit_group: {}".format(new_fit_group[1]))

        is_ok = True
        self.assertTrue(is_ok, True)
コード例 #17
0
    def test_single_run_05(self):

        user_ids, user_profiles, user_connections = load_sample_test_data()

        # definte test data
        fit_group = {0: [], 1: ['a', 'd', 'e']}
        fit_pvals = {0: [], 1: [1, 1]}
        dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]}
        buffer_group = []

        try:
            single_run_res = _groupwise_dist_learning_single_run(dist_metrics, fit_group, fit_pvals, buffer_group,
                                                                 user_ids, user_profiles, user_connections,
                                                                 ks_alpha=0.05, min_group_size=1, verbose=True)
            # test internal mehtod regardin validating input data
            # it should capture the illegal input
            is_ok = False
        except:
            is_ok = True
        self.assertTrue(is_ok)
    def test_update_groupwise_dist(self):

        user_ids, user_profile_df, user_connection_df = load_sample_test_data()

        # define test data
        fit_group = {0: ['a', 'b', 'c'], 1: ['d', 'e']}
        dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]}

        new_dist_metrics = _update_groupwise_dist(dist_metrics, fit_group,
                                                  user_ids, user_profile_df, user_connection_df,
                                                  min_group_size=1)

        print("--- test_update_groupwise_dist (with generic metrics as inputs) ---")
        print("1st group's old distance weights: {}".format(dist_metrics[0]))
        print("2nd group's old distance weights: {}".format(dist_metrics[1]))
        print("1st group's new distance weights: {}".format(new_dist_metrics[0]))
        print("2nd group's new distance weights: {}".format(new_dist_metrics[1]))

        is_ok = True
        self.assertTrue(is_ok, True)
    def test_update_unfit_groups_with_crossgroup_dist_02(self):

        # load test data
        user_ids, user_profile_df, user_connection_df = load_sample_test_data()
        # define test data
        fit_group = {0: [], 1: []}
        fit_pvals = {0: [], 1: []}
        unfit_group = {0: ['d', 'e'], 1: ['a', 'b', 'c']}
        buffer_group = []
        dist_metrics = {0: [1, 0, 1], 1: [1, 1, 0]}
        dist_memory_container = [{}, {}]

        new_fit_group, new_fit_pvals, buffer_group = _update_unfit_groups_with_crossgroup_dist(
            dist_metrics,
            fit_group,
            fit_pvals,
            unfit_group,
            buffer_group,
            user_ids,
            user_profile_df,
            user_connection_df,
            dist_memory_container,
            ks_alpha=0.05)

        print(
            "--- _update_unfit_groups_with_crossgroup_dist (with modified dist metrics)---"
        )
        print("1st unfit_group: {}".format(unfit_group[0]))
        print("2nd unfit_group: {}".format(unfit_group[1]))
        print("new 1st group's distance metrics: {}".format(dist_metrics[0]))
        print("new 2nd group's distance metrics: {}".format(dist_metrics[1]))
        print("new 1st fit_group: {}".format(new_fit_group[0]))
        print("new 2nd fit_group: {}".format(new_fit_group[1]))
        print("new 1st fit_pval: {}".format(new_fit_pvals[0]))
        print("new 2nd fit_pval: {}".format(new_fit_pvals[1]))
        print("buffer_group: {}".format(buffer_group))

        is_ok = True
        self.assertTrue(is_ok, True)
コード例 #20
0
    def test_single_run_03(self):
        user_ids, user_profiles, user_connections = load_sample_test_data()

        # definte test data
        fit_group = {0: [], 1: ['a', 'd', 'e', 'b', 'c']}
        fit_pvals = {0: [], 1: [1, 1, 1, 1, 1]}
        dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]}
        buffer_group = []

        single_run_res = _groupwise_dist_learning_single_run(dist_metrics, fit_group, fit_pvals, buffer_group,
                                                             user_ids, user_profiles, user_connections,
                                                             ks_alpha=0.05, min_group_size=1, verbose=True)

        new_dist_metrics, new_fit_group, new_fit_pvals, new_buffer_group = single_run_res

        print("--- single run test ---")
        print("1st new_dist_metrics's distance metrics: {}".format(new_dist_metrics[0]))
        print("1nd new_dist_metrics's distance metrics: {}".format(new_dist_metrics[1]))
        print("new 1st fit_group: {}".format(new_fit_group[0]))
        print("new 2nd fit_group: {}".format(new_fit_group[1]))
        print("new 1st fit_pval: {}".format(new_fit_pvals[0]))
        print("new 2nd fit_pval: {}".format(new_fit_pvals[1]))
        print("buffer_group: {}".format(new_buffer_group))