def test_update_unfit_groups_with_crossgroup_dist_03(self): # load test data user_ids, user_profile_df, user_connection_df = load_sample_test_data() # define test data fit_group = {0:[], 1:[]} fit_pvals = {0:[], 1:[]} unfit_group = {0: ['d', 'e'], 1: ['a', 'b', 'c']} buffer_group = [] dist_metrics = {0: [1, 1, 1], 1: [0, 0, 1]} new_fit_group, new_fit_pvals, buffer_group = _update_unfit_groups_with_crossgroup_dist(dist_metrics, fit_group, fit_pvals, unfit_group, buffer_group, user_ids, user_profile_df, user_connection_df, ks_alpha=0.05) print("--- _update_unfit_groups_with_crossgroup_dist (with reverse dist metrics)---") print("1st unfit_group: {}".format(unfit_group[0])) print("2nd unfit_group: {}".format(unfit_group[1])) print("new 1st group's distance metrics: {}".format(dist_metrics[0])) print("new 2nd group's distance metrics: {}".format(dist_metrics[1])) print("new 1st fit_group: {}".format(new_fit_group[0])) print("new 2nd fit_group: {}".format(new_fit_group[1])) print("new 1st fit_pval: {}".format(new_fit_pvals[0])) print("new 2nd fit_pval: {}".format(new_fit_pvals[1])) print("buffer_group: {}".format(buffer_group))
def test_single_run_05(self): user_ids, user_profiles, user_connections = load_sample_test_data() # definte test data fit_group = {0: [], 1: ['a', 'd', 'e']} fit_pvals = {0: [], 1: [1, 1]} dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]} buffer_group = [] try: single_run_res = _groupwise_dist_learning_single_run( dist_metrics, fit_group, fit_pvals, buffer_group, user_ids, user_profiles, user_connections, ks_alpha=0.05, min_group_size=1, verbose=True) # test internal mehtod regardin validating input data # it should capture the illegal input is_ok = False except: is_ok = True self.assertTrue(is_ok)
def test_update_fit_group_with_groupwise_dist_01(self): """ test with generic distance metrics """ # load test data user_ids, user_profile_df, user_connection_df = load_sample_test_data() # definte test data fit_group = {0: ['a', 'b', 'c'], 1: ['d', 'e']} fit_pvals = {0: [1, 1, 1], 1: [1, 1]} dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]} dist_memory_container = [{}, {}] new_fit_group, new_fit_pvals, unfit_group = _update_fit_group_with_groupwise_dist( dist_metrics, fit_group, fit_pvals, user_ids, user_profile_df, user_connection_df, dist_memory_container, is_directed=False, ks_alpha=0.05) print( "--- update_fit_group_with_groupwise (with: generic distance metrics) ---" ) print("new 1st fit_group: {}".format(new_fit_group[0])) print("new 2nd fit_group: {}".format(new_fit_group[1])) print("new 1st fit_pval: {}".format(new_fit_pvals[0])) print("new 2nd fit_pval: {}".format(new_fit_pvals[1])) is_ok = True self.assertTrue(is_ok, True)
def test_update_buffer_group(self): """ test _update_buffer_group """ # load test data user_ids, user_profile_df, user_connection_df = load_sample_test_data() # define test data fit_group = {0: [], 1: []} fit_pvals = {0: [], 1: []} buffer_group = user_ids new_dist_metrics = {0: [0, 1, 0], 1: [0, 0, 1]} dist_memory_container = [{}, {}] new_fit_group, new_fit_pvals, unfit_group = _update_buffer_group( new_dist_metrics, fit_group, fit_pvals, buffer_group, user_ids, user_profile_df, user_connection_df, dist_memory_container, ks_alpha=0.5) print("--- _update_buffer_group (with empty fit_group)---") print("new 1st fit_group: {}".format(new_fit_group[0])) print("new 2nd fit_group: {}".format(new_fit_group[1])) is_ok = True self.assertTrue(is_ok, True)
def test_single_run_03(self): user_ids, user_profiles, user_connections = load_sample_test_data() # definte test data fit_group = {0: [], 1: ['a', 'd', 'e', 'b', 'c']} fit_pvals = {0: [], 1: [1, 1, 1, 1, 1]} dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]} buffer_group = [] single_run_res = _groupwise_dist_learning_single_run(dist_metrics, fit_group, fit_pvals, buffer_group, user_ids, user_profiles, user_connections, ks_alpha=0.05, min_group_size=1, verbose=True) new_dist_metrics, new_fit_group, new_fit_pvals, new_buffer_group = single_run_res print("--- single run test ---") print("1st new_dist_metrics's distance metrics: {}".format( new_dist_metrics[0])) print("1nd new_dist_metrics's distance metrics: {}".format( new_dist_metrics[1])) print("new 1st fit_group: {}".format(new_fit_group[0])) print("new 2nd fit_group: {}".format(new_fit_group[1])) print("new 1st fit_pval: {}".format(new_fit_pvals[0])) print("new 2nd fit_pval: {}".format(new_fit_pvals[1])) print("buffer_group: {}".format(new_buffer_group))
def test_update_groupwise_dist(self): user_ids, user_profile_df, user_connection_df = load_sample_test_data() # define test data fit_group = {0: ['a', 'b', 'c'], 1: ['d', 'e']} dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]} new_dist_metrics = _update_groupwise_dist(dist_metrics, fit_group, user_ids, user_profile_df, user_connection_df, min_group_size=1) print( "--- test_update_groupwise_dist (with generic metrics as inputs) ---" ) print("1st group's old distance weights: {}".format(dist_metrics[0])) print("2nd group's old distance weights: {}".format(dist_metrics[1])) print("1st group's new distance weights: {}".format( new_dist_metrics[0])) print("2nd group's new distance weights: {}".format( new_dist_metrics[1])) is_ok = True self.assertTrue(is_ok, True)
def test_update_fit_group_with_groupwise_dist_02(self): """ test with generic distance metrics """ # load test data user_ids, user_profile_df, user_connection_df = load_sample_test_data() # definte test data fit_group = {0: ['a', 'b', 'c'], 1: ['d', 'e']} fit_pvals = {0: [1, 1, 1], 1: [1, 1]} new_dist_metrics = {0: [0, 1, 0], 1: [0, 0, 1]} new_fit_group, new_fit_pvals, unfit_group = _update_fit_group_with_groupwise_dist(new_dist_metrics, fit_group, fit_pvals, user_ids, user_profile_df, user_connection_df, ks_alpha=0.05) print("--- update_fit_group_with_groupwise (with: learned distance metrics) ---") print("new 1st fit_group: {}".format(new_fit_group[0])) print("new 2nd fit_group: {}".format(new_fit_group[1])) print("new 1st fit_pval: {}".format(new_fit_pvals[0])) print("new 2nd fit_pval: {}".format(new_fit_pvals[1])) is_ok = True self.assertTrue(is_ok, True)
def test_learner_01(self): user_ids, user_profiles, user_connections = load_sample_test_data() best_pack = groupwise_dist_learning(user_ids, user_profiles, user_connections, n_group=2, max_iter=20, max_nogain_streak=5, min_group_size=1, ks_alpha=0.1, init='zipf', verbose=True, C=0.1) knowledge_pack, best_score = best_pack new_dist_metrics, new_fit_group, new_buffer_group = knowledge_pack print("--- leaner test (n_group=2) ---") print("1st new_dist_metrics's distance metrics: {}".format( new_dist_metrics[0])) print("1nd new_dist_metrics's distance metrics: {}".format( new_dist_metrics[1])) print("new 1st fit_group: {}".format(new_fit_group[0])) print("new 2nd fit_group: {}".format(new_fit_group[1])) print("buffer_group: {}".format(new_buffer_group))
def test_learner_class_init_zipf(self): user_ids, user_profiles, user_connections = load_sample_test_data() gwd_learner = GroupwiseDistLearner(n_group=2, min_group_size=1, init="zipf", max_iter=10, verbose=True) gwd_learner.fit(user_ids, user_profiles, user_connections) print("--- learner class (n_group=2) with init='zipf' ---") print("best score: {}".format(gwd_learner.get_score()))
def test_validate_user_informationg_with_good_data(self): user_ids, user_profile_df, user_connection_df = load_sample_test_data() try: _validate_user_information(user_ids = user_ids, user_profiles = user_profile_df, user_connections = user_connection_df) is_ok = True except: is_ok = False self.assertTrue(is_ok)
def test_validate_user_informationg_with_good_data(self): user_ids, user_profile_df, user_connection_df = load_sample_test_data() try: _validate_user_information(user_ids=user_ids, user_profiles=user_profile_df, user_connections=user_connection_df) is_ok = True except: is_ok = False self.assertTrue(is_ok)
def test_learner_class_init_zipf(self): user_ids, user_profiles, user_connections = load_sample_test_data() gwd_learner = GroupwiseDistLearner(n_group=2, min_group_size=1, init="zipf", max_iter=10, verbose=True) gwd_learner.fit(user_ids, user_profiles, user_connections) print("--- learner class (n_group=2) with init='zipf' ---") print("best score: {}".format(gwd_learner.get_score()))
def test_validate_user_informationg_with_bad_data(self): user_ids, user_profile_df, user_connection_df = load_sample_test_data() try: # remove first user_id's record _validate_user_information(user_ids = user_ids[1:], user_profiles = user_profile_df, user_connections = user_connection_df) is_ok = False except: is_ok = True self.assertTrue(is_ok)
def test_validate_user_informationg_with_bad_data(self): user_ids, user_profile_df, user_connection_df = load_sample_test_data() try: # remove first user_id's record _validate_user_information(user_ids=user_ids[1:], user_profiles=user_profile_df, user_connections=user_connection_df) is_ok = False except: is_ok = True self.assertTrue(is_ok)
def test_learner_02(self): user_ids, user_profiles, user_connections = load_sample_test_data() best_pack = groupwise_dist_learning(user_ids, user_profiles, user_connections, n_group=1, max_iter=20, max_nogain_streak=5, min_group_size=1, ks_alpha=0.1, init='zipf', verbose=True, C=0.1) knowledge_pack, best_score = best_pack new_dist_metrics, new_fit_group, new_buffer_group = knowledge_pack print("--- leaner test (n_group=1) ---") print("1st new_dist_metrics's distance metrics: {}".format(new_dist_metrics[0])) print("new 1st fit_group: {}".format(new_fit_group[0])) print("buffer_group: {}".format(new_buffer_group))
def test_update_buffer_group(self): """ test _update_buffer_group """ # load test data user_ids, user_profile_df, user_connection_df = load_sample_test_data() # define test data fit_group = {0:[], 1:[]} fit_pvals = {0:[], 1:[]} buffer_group = user_ids new_dist_metrics = {0: [0, 1, 0], 1: [0, 0, 1]} new_fit_group, new_fit_pvals, unfit_group = _update_buffer_group(new_dist_metrics, fit_group, fit_pvals, buffer_group, user_ids, user_profile_df, user_connection_df, ks_alpha=0.5) print("--- _update_buffer_group (with empty fit_group)---") print("new 1st fit_group: {}".format(new_fit_group[0])) print("new 2nd fit_group: {}".format(new_fit_group[1])) is_ok = True self.assertTrue(is_ok, True)
def test_single_run_05(self): user_ids, user_profiles, user_connections = load_sample_test_data() # definte test data fit_group = {0: [], 1: ['a', 'd', 'e']} fit_pvals = {0: [], 1: [1, 1]} dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]} buffer_group = [] try: single_run_res = _groupwise_dist_learning_single_run(dist_metrics, fit_group, fit_pvals, buffer_group, user_ids, user_profiles, user_connections, ks_alpha=0.05, min_group_size=1, verbose=True) # test internal mehtod regardin validating input data # it should capture the illegal input is_ok = False except: is_ok = True self.assertTrue(is_ok)
def test_update_groupwise_dist(self): user_ids, user_profile_df, user_connection_df = load_sample_test_data() # define test data fit_group = {0: ['a', 'b', 'c'], 1: ['d', 'e']} dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]} new_dist_metrics = _update_groupwise_dist(dist_metrics, fit_group, user_ids, user_profile_df, user_connection_df, min_group_size=1) print("--- test_update_groupwise_dist (with generic metrics as inputs) ---") print("1st group's old distance weights: {}".format(dist_metrics[0])) print("2nd group's old distance weights: {}".format(dist_metrics[1])) print("1st group's new distance weights: {}".format(new_dist_metrics[0])) print("2nd group's new distance weights: {}".format(new_dist_metrics[1])) is_ok = True self.assertTrue(is_ok, True)
def test_update_unfit_groups_with_crossgroup_dist_02(self): # load test data user_ids, user_profile_df, user_connection_df = load_sample_test_data() # define test data fit_group = {0: [], 1: []} fit_pvals = {0: [], 1: []} unfit_group = {0: ['d', 'e'], 1: ['a', 'b', 'c']} buffer_group = [] dist_metrics = {0: [1, 0, 1], 1: [1, 1, 0]} dist_memory_container = [{}, {}] new_fit_group, new_fit_pvals, buffer_group = _update_unfit_groups_with_crossgroup_dist( dist_metrics, fit_group, fit_pvals, unfit_group, buffer_group, user_ids, user_profile_df, user_connection_df, dist_memory_container, ks_alpha=0.05) print( "--- _update_unfit_groups_with_crossgroup_dist (with modified dist metrics)---" ) print("1st unfit_group: {}".format(unfit_group[0])) print("2nd unfit_group: {}".format(unfit_group[1])) print("new 1st group's distance metrics: {}".format(dist_metrics[0])) print("new 2nd group's distance metrics: {}".format(dist_metrics[1])) print("new 1st fit_group: {}".format(new_fit_group[0])) print("new 2nd fit_group: {}".format(new_fit_group[1])) print("new 1st fit_pval: {}".format(new_fit_pvals[0])) print("new 2nd fit_pval: {}".format(new_fit_pvals[1])) print("buffer_group: {}".format(buffer_group)) is_ok = True self.assertTrue(is_ok, True)
def test_single_run_03(self): user_ids, user_profiles, user_connections = load_sample_test_data() # definte test data fit_group = {0: [], 1: ['a', 'd', 'e', 'b', 'c']} fit_pvals = {0: [], 1: [1, 1, 1, 1, 1]} dist_metrics = {0: [1, 1, 1], 1: [1, 1, 1]} buffer_group = [] single_run_res = _groupwise_dist_learning_single_run(dist_metrics, fit_group, fit_pvals, buffer_group, user_ids, user_profiles, user_connections, ks_alpha=0.05, min_group_size=1, verbose=True) new_dist_metrics, new_fit_group, new_fit_pvals, new_buffer_group = single_run_res print("--- single run test ---") print("1st new_dist_metrics's distance metrics: {}".format(new_dist_metrics[0])) print("1nd new_dist_metrics's distance metrics: {}".format(new_dist_metrics[1])) print("new 1st fit_group: {}".format(new_fit_group[0])) print("new 2nd fit_group: {}".format(new_fit_group[1])) print("new 1st fit_pval: {}".format(new_fit_pvals[0])) print("new 2nd fit_pval: {}".format(new_fit_pvals[1])) print("buffer_group: {}".format(new_buffer_group))