Ejemplo n.º 1
0
    def test_blocker_combiner_valid_8(self):
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        C1 = read_csv_metadata(os.sep.join([bc_datasets_path, 'C4_ex_1.csv']), ltable=A, rtable=B)
        C1.rename(columns={'l_ID':'ltable_ID'}, inplace=True)
        C1.rename(columns={'r_ID':'rtable_ID'}, inplace=True)
        cm.set_fk_ltable(C1, 'ltable_ID')
        cm.set_fk_rtable(C1, 'rtable_ID')
        C2 = read_csv_metadata(os.sep.join([bc_datasets_path, 'C4_ex_2.csv']), ltable=A, rtable=B)
        C2.rename(columns={'l_ID':'ltable_ID'}, inplace=True)
        C2.rename(columns={'r_ID':'rtable_ID'}, inplace=True)
        cm.set_fk_ltable(C2, 'ltable_ID')
        cm.set_fk_rtable(C2, 'rtable_ID')

        C = combine_blocker_outputs_via_union([C1, C2], 'l_', 'r_')
        C_exp = read_csv_metadata(os.sep.join([bc_datasets_path, 'C_ex_4.csv']), ltable=A, rtable=B)
        C_exp.rename(columns={'l_ID':'ltable_ID'}, inplace=True)
        C_exp.rename(columns={'r_ID':'rtable_ID'}, inplace=True)
        cm.set_fk_ltable(C_exp, 'ltable_ID')
        cm.set_fk_rtable(C_exp, 'rtable_ID')

        # C_exp.sort_values(['l_ID', 'r_ID'], inplace=True)
        # C_exp.reset_index(inplace=True, drop=True)
        # C_exp['_id'] = six.moves.range(0, len(C_exp))
        # C_exp.drop('r_address', axis=1, inplace=True)
        if os.name != 'nt':
            self.assertEqual(C.equals(C_exp), True)
        p1 = cm.get_all_properties(C)
        p2 = cm.get_all_properties(C_exp)
        self.assertEqual(p1, p2)
Ejemplo n.º 2
0
 def test_blocker_combiner_valid_5(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     C1 = read_csv_metadata(os.sep.join([bc_datasets_path, 'C3_ex_2.csv']), ltable=A, rtable=B)
     C = combine_blocker_outputs_via_union([C1, C1])
     self.assertEqual(len(C), 0)
     p1 = cm.get_all_properties(C)
     p2 = cm.get_all_properties(C1)
     self.assertEqual(p1, p2)
Ejemplo n.º 3
0
    def test_blocker_combiner_valid_2(self):
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        C1 = read_csv_metadata(os.sep.join([bc_datasets_path, 'C1_ex_1.csv']), ltable=A, rtable=B)
        C2 = read_csv_metadata(os.sep.join([bc_datasets_path, 'C2_ex_1.csv']), ltable=A, rtable=B)
        C = combine_blocker_outputs_via_union([C1, C2])

        C_exp = read_csv_metadata(os.sep.join([bc_datasets_path, 'C_ex_1.csv']), ltable=A, rtable=B)
        if os.name != 'nt':
            self.assertEqual(C.equals(C_exp), True)
        p1 = cm.get_all_properties(C)
        p2 = cm.get_all_properties(C_exp)
        self.assertEqual(p1, p2)
Ejemplo n.º 4
0
    def test_blocker_combiner_valid_4(self):
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        C1 = read_csv_metadata(os.sep.join([bc_datasets_path, 'C1_ex_1.csv']), ltable=A, rtable=B)
        C = combine_blocker_outputs_via_union([C1, C1])
        # try:
        #     C1.sort_values(['ltable_ID', 'rtable_ID'], inplace=True)
        # except AttributeError:
        #     C1.sort(['ltable_ID', 'rtable_ID'], inplace=True)
        # to_csv_metadata(C1, os.sep.join([bc_datasets_path, 'C1_ex_1.csv']))

        C1.reset_index(inplace=True, drop=True)
        C1['_id'] = six.moves.range(0, len(C1))

        if os.name != 'nt':
            self.assertEqual(C.equals(C1), True)
        p1 = cm.get_all_properties(C)
        p2 = cm.get_all_properties(C1)
        self.assertEqual(p1, p2)
Ejemplo n.º 5
0
    def test_blocker_combiner_valid_1(self):
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        C1 = read_csv_metadata(path_c1, ltable=A, rtable=B)
        C2 = read_csv_metadata(path_c2, ltable=A, rtable=B)
        C3 = read_csv_metadata(path_c3, ltable=A, rtable=B)
        C = combine_blocker_outputs_via_union([C1, C2, C3])
        C_exp = read_csv_metadata(path_c, ltable=A, rtable=B)
        # try:
        #     C_exp.sort_values(['ltable_ID', 'rtable_ID'], inplace=True)
        # except AttributeError:
        #     C_exp.sort(['ltable_ID', 'rtable_ID'], inplace=True)
        # to_csv_metadata(C_exp, path_c)


        C_exp.reset_index(inplace=True, drop=True)
        C_exp['_id'] = six.moves.range(0, len(C_exp))
        if os.name != 'nt':
            self.assertEqual(C.equals(C_exp), True)
        p1 = cm.get_all_properties(C)
        p2 = cm.get_all_properties(C_exp)
        self.assertEqual(p1, p2)
Ejemplo n.º 6
0
path_c = os.sep.join([bc_datasets_path, 'C.csv'])
path_c1 = os.sep.join([bc_datasets_path, 'C1.csv'])
path_c2 = os.sep.join([bc_datasets_path, 'C2.csv'])
path_c3 = os.sep.join([bc_datasets_path, 'C3.csv'])

# A = read_csv_metadata(path_a)
# B = read_csv_metadata(path_b, key='ID')
# C1 = read_csv_metadata(path_c1, ltable=A, rtable=B)
# C2 = read_csv_metadata(path_c2, ltable=A, rtable=B)
# C3 = read_csv_metadata(path_c3, ltable=A, rtable=B)
# C = combine_blocker_outputs_via_union([C1, C2, C3])
# # print(C.head(5))
# C1 = read_csv_metadata(path_c, ltable=A, rtable=B)
# C1.sort_values(['ltable_ID', 'rtable_ID'], inplace=True)
# C1['_id'] = six.moves.range(0, len(C1))
# C1.reset_index(inplace=True, drop=True)
# ne = (C == C1)
# # print(ne)
# print(C['_id'])
# print(C1['_id'])
# # print(C1.head(5))


A = read_csv_metadata(path_a)
B = read_csv_metadata(path_b, key='ID')
C1 = read_csv_metadata(os.sep.join([bc_datasets_path, 'C3_ex_2.csv']), ltable=A, rtable=B)
C = combine_blocker_outputs_via_union([C1, C1])
print(C.head(5))
print(C1.head(5))

Ejemplo n.º 7
0
 def test_blocker_combiner_invalid_rprefix(self):
     combine_blocker_outputs_via_union([pd.DataFrame()], 'ltable_', None)
Ejemplo n.º 8
0
 def test_blocker_combiner_invalid_df(self):
     combine_blocker_outputs_via_union([10, 10])
Ejemplo n.º 9
0
path_c = os.sep.join([bc_datasets_path, 'C.csv'])
path_c1 = os.sep.join([bc_datasets_path, 'C1.csv'])
path_c2 = os.sep.join([bc_datasets_path, 'C2.csv'])
path_c3 = os.sep.join([bc_datasets_path, 'C3.csv'])

# A = read_csv_metadata(path_a)
# B = read_csv_metadata(path_b, key='ID')
# C1 = read_csv_metadata(path_c1, ltable=A, rtable=B)
# C2 = read_csv_metadata(path_c2, ltable=A, rtable=B)
# C3 = read_csv_metadata(path_c3, ltable=A, rtable=B)
# C = combine_blocker_outputs_via_union([C1, C2, C3])
# # print(C.head(5))
# C1 = read_csv_metadata(path_c, ltable=A, rtable=B)
# C1.sort_values(['ltable_ID', 'rtable_ID'], inplace=True)
# C1['_id'] = six.moves.range(0, len(C1))
# C1.reset_index(inplace=True, drop=True)
# ne = (C == C1)
# # print(ne)
# print(C['_id'])
# print(C1['_id'])
# # print(C1.head(5))

A = read_csv_metadata(path_a)
B = read_csv_metadata(path_b, key='ID')
C1 = read_csv_metadata(os.sep.join([bc_datasets_path, 'C3_ex_2.csv']),
                       ltable=A,
                       rtable=B)
C = combine_blocker_outputs_via_union([C1, C1])
print(C.head(5))
print(C1.head(5))