def test_feature_fn_valid_nosim_tok(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     feature_table = get_features_for_matching(A, B)
     len1 = len(feature_table)
     feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])"
     f_dict = get_feature_fn(feature_string, dict(), dict())
 def test_feature_fn_valid_nosim_tok(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     feature_table = get_features_for_matching(A, B)
     len1 = len(feature_table)
     feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])"
     f_dict = get_feature_fn(feature_string, dict(), dict())
    def test_add_feature_invalid_df_columns(self):
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')

        feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])"
        f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(), get_sim_funs_for_matching())
        add_feature(pd.DataFrame(), 'test', f_dict)
    def test_add_feature_invalid_df_columns(self):
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')

        feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])"
        f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(),
                                get_sim_funs_for_matching())
        add_feature(pd.DataFrame(), 'test', f_dict)
 def test_add_feature_name_already_present(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     feature_table = create_feature_table()
     len1 = len(feature_table)
     feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])"
     f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(), get_sim_funs_for_matching())
     add_feature(feature_table, 'test', f_dict)
     add_feature(feature_table, 'test', f_dict)
 def test_add_feature_name_already_present(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     feature_table = create_feature_table()
     len1 = len(feature_table)
     feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])"
     f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(),
                             get_sim_funs_for_matching())
     add_feature(feature_table, 'test', f_dict)
     add_feature(feature_table, 'test', f_dict)
 def test_add_features_valid_1(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     feature_table = get_features_for_matching(A, B)
     len1 = len(feature_table)
     feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])"
     f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(), get_sim_funs_for_matching())
     add_feature(feature_table, 'test', f_dict)
     len2 = len(feature_table)
     self.assertEqual(len1+1, len2)
     self.assertEqual(feature_table.ix[len(feature_table)-1, 'function'](A.ix[1], B.ix[2]), 1.0)
 def test_add_features_valid_1(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     feature_table = get_features_for_matching(A, B)
     len1 = len(feature_table)
     feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])"
     f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(),
                             get_sim_funs_for_matching())
     add_feature(feature_table, 'test', f_dict)
     len2 = len(feature_table)
     self.assertEqual(len1 + 1, len2)
     self.assertEqual(
         feature_table.ix[len(feature_table) - 1, 'function'](A.ix[1],
                                                              B.ix[2]), 1.0)
 def test_get_feature_fn_invalid_tok(self):
     get_feature_fn("", dict(), None)
 def test_get_feature_fn_invalid_sim(self):
     get_feature_fn("", None, dict())
 def test_get_feature_fn_invalid_feat_str(self):
     get_feature_fn(None, dict(), dict())
 def test_get_feature_fn_invalid_tok(self):
     get_feature_fn("", dict(), None)
 def test_get_feature_fn_invalid_sim(self):
     get_feature_fn("", None, dict())
 def test_get_feature_fn_invalid_feat_str(self):
     get_feature_fn(None, dict(), dict())