def test_get_features_for_blocking_valid(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     feat_table = afg.get_features_for_blocking(A, B)
     self.assertEqual(isinstance(feat_table, pd.DataFrame), True)
     functions = feat_table['function']
     for f in functions:
         x = f(A.ix[1], B.ix[2])
         self.assertEqual(x >= 0, True)
 def test_get_features_for_blocking_valid(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     feat_table = afg.get_features_for_blocking(A, B)
     self.assertEqual(isinstance(feat_table, pd.DataFrame), True)
     functions = feat_table['function']
     for f in functions:
         x = f(A.ix[1], B.ix[2])
         self.assertEqual(x >= 0, True)
Exemple #3
0
    def test_valid_object_2(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        feature_table = get_features_for_blocking(A, B)
        rb = RuleBasedBlocker()
        rb.add_rule('zipcode_zipcode_exm(ltuple, rtuple) != 1', feature_table)
        C = rb.block_tables(A, B, show_progress=False)
        self.assertEqual(len(C), 15)
        p = os.sep.join([sndbx_path, 'C.pkl'])
        creat_dir_ifnot_exists(sndbx_path)
        save_object(rb, p)

        rb1 = load_object(p)
        C1 = rb1.block_tables(A, B, show_progress=False)
        self.assertEqual(C.equals(C1), True)
 def test_get_features_for_blocking_invalid_df2(self):
     A = read_csv_metadata(path_a)
     # B = read_csv_metadata(path_b, key='ID')
     feat_table = afg.get_features_for_blocking(A, None)
 def test_get_features_for_blocking_invalid_df1(self):
     # A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     feat_table = afg.get_features_for_blocking(None, B)
 def test_get_features_for_blocking_invalid_df2(self):
     A = read_csv_metadata(path_a)
     # B = read_csv_metadata(path_b, key='ID')
     feat_table = afg.get_features_for_blocking(A, None)
 def test_get_features_for_blocking_invalid_df1(self):
     # A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     feat_table = afg.get_features_for_blocking(None, B)