def test_get_features_for_blocking_valid(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feat_table = afg.get_features_for_blocking(A, B) self.assertEqual(isinstance(feat_table, pd.DataFrame), True) functions = feat_table['function'] for f in functions: x = f(A.ix[1], B.ix[2]) self.assertEqual(x >= 0, True)
def test_valid_object_2(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_blocking(A, B) rb = RuleBasedBlocker() rb.add_rule('zipcode_zipcode_exm(ltuple, rtuple) != 1', feature_table) C = rb.block_tables(A, B, show_progress=False) self.assertEqual(len(C), 15) p = os.sep.join([sndbx_path, 'C.pkl']) creat_dir_ifnot_exists(sndbx_path) save_object(rb, p) rb1 = load_object(p) C1 = rb1.block_tables(A, B, show_progress=False) self.assertEqual(C.equals(C1), True)
def test_get_features_for_blocking_invalid_df2(self): A = read_csv_metadata(path_a) # B = read_csv_metadata(path_b, key='ID') feat_table = afg.get_features_for_blocking(A, None)
def test_get_features_for_blocking_invalid_df1(self): # A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feat_table = afg.get_features_for_blocking(None, B)