def test_get_filtered_table(self): A = pd.DataFrame([['a1', 'A', 0.11, 53704]]) A.columns = ['ID', 'name', 'price', 'zip code'] em.set_key(A, 'ID') B = pd.DataFrame([['b1', 'A', 0.11, 54321]]) B.columns = ['ID', 'name', 'price', 'zip code'] em.set_key(B, 'ID') A_key = 'ID' B_key = 'ID' ltable_col_dict = db._build_col_name_index_dict(A) rtable_col_dict = db._build_col_name_index_dict(B) attr_corres = [('ID', 'ID'), ('name', 'name'), ('price', 'price'), ('zip code', 'zip code')] db._filter_corres_list(A, B, A_key, B_key, ltable_col_dict, rtable_col_dict, attr_corres) filtered_A, filtered_B = db._get_filtered_table(A, B, attr_corres) expected_filtered_A = pd.DataFrame([['a1', 'A']]) expected_filtered_A.columns = ['ID', 'name'] em.set_key(expected_filtered_A, 'ID') expected_filtered_B = pd.DataFrame([['b1', 'A']]) expected_filtered_B.columns = ['ID', 'name'] em.set_key(expected_filtered_B, 'ID') self.assertEqual(expected_filtered_A.equals(filtered_A), True) self.assertEqual(expected_filtered_B.equals(filtered_B), True)
def test_get_filtered_table(self): A = pd.DataFrame([['a1', 'A', 0.11, 53704]]) A.columns = ['ID', 'name', 'price', 'zip code'] em.set_key(A, 'ID') B = pd.DataFrame([['b1', 'A', 0.11, 54321]]) B.columns = ['ID', 'name', 'price', 'zip code'] em.set_key(B, 'ID') A_key = 'ID' B_key = 'ID' ltable_col_dict = db._build_col_name_index_dict(A) rtable_col_dict = db._build_col_name_index_dict(B) attr_corres = [('ID', 'ID'), ('name', 'name'), ('price', 'price'), ('zip code', 'zip code')] db._filter_corres_list(A, B, A_key, B_key, ltable_col_dict, rtable_col_dict, attr_corres) filtered_A, filtered_B = db._get_filtered_table(A, B, attr_corres) expected_filtered_A = pd.DataFrame([['a1', 'A']]) expected_filtered_A.columns = ['ID', 'name'] em.set_key(expected_filtered_A, 'ID') expected_filtered_B = pd.DataFrame([['b1', 'A']]) expected_filtered_B.columns = ['ID', 'name'] em.set_key(expected_filtered_B, 'ID') self.assertEqual(expected_filtered_A.equals(filtered_A), True) self.assertEqual(expected_filtered_B.equals(filtered_B), True)
def test_select_features_3(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') A_key = em.get_key(A) B_key = em.get_key(B) corres_list = [(0, 0)] A_filtered, B_filtered = db._get_filtered_table(A, B, A_key, B_key, corres_list) actual_selected_features = db._select_features( A_filtered, B_filtered, A_key) expected_selected_features = [] self.assertEqual(actual_selected_features, expected_selected_features)
def test_select_features_3(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') A_key = em.get_key(A) B_key = em.get_key(B) cols_A = list(A.columns) cols_B = list(B.columns) corres_list = [(cols_A[0], cols_B[0])] A_filtered, B_filtered = db._get_filtered_table(A, B, corres_list) actual_selected_features = db._select_features( A_filtered, B_filtered, A_key, B_key) expected_selected_features = [] self.assertEqual(actual_selected_features, expected_selected_features)
def test_select_features_2(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') A_key = em.get_key(A) B_key = em.get_key(B) cols_A = list(A.columns) cols_B = list(B.columns) corres_list = [(cols_A[0], cols_B[0]), (cols_A[1], cols_B[1]), (cols_A[4], cols_B[4])] A_filtered, B_filtered = db._get_filtered_table(A, B, corres_list) actual_selected_features = db._select_features( A_filtered, B_filtered, A_key, B_key) expected_selected_features = [1, 2] self.assertEqual(actual_selected_features, expected_selected_features)
def test_get_feature_weight_2(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') A_key = em.get_key(A) B_key = em.get_key(B) corres_list = [(0, 0), (1, 1), (4, 4), (5, 5)] A_filtered, B_filtered = db._get_filtered_table( A, B, A_key, B_key, corres_list) A_wlist = db._get_feature_weight(A_filtered) expected_A_wlist = [2.0, 2.0, 2.0, 1.4] self.assertEqual(A_wlist, expected_A_wlist) B_wlist = db._get_feature_weight(B_filtered) expected_B_wlist = [2.0, 2.0, 2.0, 1.3333333333333333] self.assertEqual(B_wlist, expected_B_wlist)
def test_get_feature_weight_2(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') A_key = em.get_key(A) B_key = em.get_key(B) cols_A = list(A.columns) cols_B = list(B.columns) corres_list = [(cols_A[0], cols_B[0]), (cols_A[1], cols_B[1]), (cols_A[4], cols_B[4]), (cols_A[5], cols_B[5])] A_filtered, B_filtered = db._get_filtered_table( A, B, A_key, B_key, corres_list) A_wlist = db._get_feature_weight(A_filtered) expected_A_wlist = [2.0, 2.0, 2.0, 1.4] self.assertEqual(A_wlist, expected_A_wlist) B_wlist = db._get_feature_weight(B_filtered) expected_B_wlist = [2.0, 2.0, 2.0, 1.3333333333333333] self.assertEqual(B_wlist, expected_B_wlist)
def test_get_feature_weight_2(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') A_key = em.get_key(A) B_key = em.get_key(B) cols_A = list(A.columns) cols_B = list(B.columns) corres_list = [(cols_A[0], cols_B[0]), (cols_A[1], cols_B[1]), (cols_A[4], cols_B[4]), (cols_A[5], cols_B[5])] A_filtered, B_filtered = db._get_filtered_table( A, B, corres_list) A_wlist = db._get_feature_weight(A_filtered) expected_A_wlist = [2.0, 2.0, 2.0, 1.4] self.assertEqual(A_wlist, expected_A_wlist) B_wlist = db._get_feature_weight(B_filtered) expected_B_wlist = [2.0, 2.0, 2.0, 1.3333333333333333] self.assertEqual(B_wlist, expected_B_wlist)