import os from nose.tools import * import unittest import pandas as pd import six from magellan.utils.generic_helper import get_install_path from magellan.io.parsers import read_csv_metadata from magellan.feature.simfunctions import get_sim_funs_for_matching from magellan.feature.tokenizers import get_tokenizers_for_matching from magellan.feature.autofeaturegen import get_features_for_matching from magellan.feature.addfeatures import add_feature, add_blackbox_feature, get_feature_fn, parse_feat_str, create_feature_table import magellan.catalog.catalog_manager as cm datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets']) path_a = os.sep.join([datasets_path, 'A.csv']) path_b = os.sep.join([datasets_path, 'B.csv']) class AddFeaturesTestCases(unittest.TestCase): def setUp(self): cm.del_catalog() def tearDown(self): cm.del_catalog() def test_add_features_valid_1(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_matching(A, B)
import os import magellan.matcher.matcherutils as mu from magellan.io.parsers import read_csv_metadata from magellan.matcher.dtmatcher import DTMatcher from magellan.utils.generic_helper import get_install_path feat_datasets_path = os.sep.join( [get_install_path(), 'datasets', 'test_datasets', 'matcherselector']) fpath_a = os.sep.join([feat_datasets_path, 'DBLP_demo.csv']) fpath_b = os.sep.join([feat_datasets_path, 'ACM_demo.csv']) fpath_c = os.sep.join([feat_datasets_path, 'dblp_acm_demo_labels.csv']) fpath_f = os.sep.join([feat_datasets_path, 'feat_vecs.csv']) A = read_csv_metadata(fpath_a, key='id') B = read_csv_metadata(fpath_b, key='id') feature_vectors = read_csv_metadata(fpath_f, ltable=A, rtable=B) train_test = mu.train_test_split(feature_vectors) train, test = train_test['train'], train_test['test'] dt = DTMatcher(name='DecisionTree') dt.fit(table=train, exclude_attrs=['ltable.id', 'rtable.id', '_id', 'gold'], target_attr='gold') predictions = dt.predict( table=test, exclude_attrs=['ltable.id', 'rtable.id', '_id', 'gold'], target_attr='predicted', append=True) print('Done')
import os from nose.tools import * import unittest import pandas as pd from magellan.utils.generic_helper import get_install_path from magellan.io.parsers import read_csv_metadata from magellan.feature.simfunctions import get_sim_funs_for_matching from magellan.feature.tokenizers import get_tokenizers_for_matching import magellan.feature.autofeaturegen as afg import magellan.catalog.catalog_manager as cm datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets']) bc_datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets', 'blockercombiner']) path_a = os.sep.join([datasets_path, 'A.csv']) path_b = os.sep.join([datasets_path, 'B.csv']) path_c = os.sep.join([datasets_path, 'C.csv']) class AutoFeatureGenerationTestCases(unittest.TestCase): def setUp(self): cm.del_catalog() def tearDown(self): cm.del_catalog() def test_get_features_valid(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') l_attr_types = afg.get_attr_types(A) r_attr_types = afg.get_attr_types(B)
import os from nose.tools import * import unittest import pandas as pd from magellan.utils.generic_helper import get_install_path from magellan.io.parsers import read_csv_metadata from magellan.feature.extractfeatures import extract_feature_vecs from magellan.feature.autofeaturegen import get_features_for_matching import magellan.catalog.catalog_manager as cm datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets']) path_a = os.sep.join([datasets_path, 'A.csv']) path_b = os.sep.join([datasets_path, 'B.csv']) path_c = os.sep.join([datasets_path, 'C.csv']) class ExtractFeaturesTestCases(unittest.TestCase): def test_extract_feature_vecs_valid_1(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) col_pos = len(C.columns) C.insert(col_pos, 'label', [0]*len(C)) feature_table = get_features_for_matching(A, B) F = extract_feature_vecs(C, attrs_before=['ltable_name', 'rtable_name'], feature_table=feature_table, attrs_after='label') self.assertEqual(isinstance(F, pd.DataFrame), True) self.assertEqual(F.columns[0], '_id') self.assertEqual(F.columns[1], cm.get_fk_ltable(C)) self.assertEqual(F.columns[2], cm.get_fk_rtable(C))
# coding=utf-8 from __future__ import unicode_literals import os import unittest import pandas as pd from nose.tools import * from magellan.io.pickles import load_object, load_table, save_object, save_table from magellan.io.parsers import read_csv_metadata from magellan.blocker.rule_based_blocker import RuleBasedBlocker from magellan.feature.autofeaturegen import get_features_for_blocking from magellan.utils.generic_helper import get_install_path, del_files_in_dir, creat_dir_ifnot_exists import magellan.catalog.catalog_manager as cm datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets']) path_a = os.sep.join([datasets_path, 'A.csv']) path_b = os.sep.join([datasets_path, 'B.csv']) path_c = os.sep.join([datasets_path, 'C.csv']) sndbx_path = os.sep.join([os.sep.join([get_install_path(), 'datasets', 'test_datasets']), 'sandbox']) class SaveObjectTestCases(unittest.TestCase): @raises(AssertionError) def test_invalid_path_1(self): p = os.sep.join([sndbx_path, 'A_saved.pkl']) save_object(p, 10) @raises(AssertionError) def test_invalid_path_2(self): p = os.sep.join([sndbx_path, 'A_saved.pkl'])
# # # mg.to_csv_metadata(C, './C.csv') # print 'Hi' import logging import os import pandas as pd from magellan.io.parsers import read_csv_metadata from magellan.utils.generic_helper import get_install_path import magellan.catalog.catalog_manager as cm logging.basicConfig() io_datasets_path = os.sep.join( [get_install_path(), 'datasets', 'test_datasets', 'io']) # path_a = os.sep.join([io_datasets_path, 'A.csv']) path_b = os.sep.join([io_datasets_path, 'B.csv']) # path_c = os.sep.join([io_datasets_path, 'C.csv']) # # A = read_csv_metadata(path_a) # B = read_csv_metadata(path_b) # # C = mg.read_csv_metadata(path_c, ltable=A, rtable=B) # # cm.show_properties(C) p_C = os.sep.join([io_datasets_path, 'C.csv']) p_A = os.sep.join([io_datasets_path, 'A_fk1.csv']) A = read_csv_metadata(p_A) cm.set_property(A, 'key', 'ID')
import os from nose.tools import * import unittest import pandas as pd from magellan.utils.generic_helper import get_install_path from magellan.io.parsers import read_csv_metadata from magellan.feature.simfunctions import get_sim_funs_for_matching from magellan.feature.tokenizers import get_tokenizers_for_matching import magellan.feature.autofeaturegen as afg import magellan.catalog.catalog_manager as cm datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets']) bc_datasets_path = os.sep.join( [get_install_path(), 'datasets', 'test_datasets', 'blockercombiner']) path_a = os.sep.join([datasets_path, 'A.csv']) path_b = os.sep.join([datasets_path, 'B.csv']) path_c = os.sep.join([datasets_path, 'C.csv']) class AutoFeatureGenerationTestCases(unittest.TestCase): def setUp(self): cm.del_catalog() def tearDown(self): cm.del_catalog() def test_get_features_valid(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID')
import os import magellan.matcher.matcherutils as mu from magellan.io.parsers import read_csv_metadata from magellan.matcher.dtmatcher import DTMatcher from magellan.utils.generic_helper import get_install_path feat_datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets', 'matcherselector']) fpath_a = os.sep.join([feat_datasets_path, 'DBLP_demo.csv']) fpath_b = os.sep.join([feat_datasets_path, 'ACM_demo.csv']) fpath_c = os.sep.join([feat_datasets_path, 'dblp_acm_demo_labels.csv']) fpath_f = os.sep.join([feat_datasets_path, 'feat_vecs.csv']) A = read_csv_metadata(fpath_a, key='id') B = read_csv_metadata(fpath_b, key='id') feature_vectors = read_csv_metadata(fpath_f, ltable=A, rtable=B) train_test = mu.train_test_split(feature_vectors) train, test = train_test['train'], train_test['test'] dt = DTMatcher(name='DecisionTree') dt.fit(table=train, exclude_attrs=['ltable.id', 'rtable.id', '_id', 'gold'], target_attr='gold') predictions = dt.predict(table=test, exclude_attrs=['ltable.id', 'rtable.id', '_id', 'gold'], target_attr='predicted', append=True) print('Done')
# coding=utf-8 from __future__ import unicode_literals import os import unittest import pandas as pd from nose.tools import * from magellan.io.parsers import read_csv_metadata, to_csv_metadata, _get_metadata_from_file from magellan.utils.generic_helper import get_install_path, del_files_in_dir, creat_dir_ifnot_exists import magellan.catalog.catalog_manager as cm datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets']) io_datasets_path = os.sep.join( [get_install_path(), 'datasets', 'test_datasets', 'io']) path_a = os.sep.join([datasets_path, 'A.csv']) path_b = os.sep.join([datasets_path, 'B.csv']) path_c = os.sep.join([datasets_path, 'C.csv']) sndbx_path = os.sep.join([ os.sep.join([get_install_path(), 'datasets', 'test_datasets']), 'sandbox' ]) class ReadCSVMetadataTestCases(unittest.TestCase): def test_valid_path_wi_valid_metadata(self): cm.del_catalog() A = read_csv_metadata(path_a) pd_A = pd.read_csv(path_a) self.assertEqual(A.equals(pd_A), True) self.assertEqual(cm.get_key(A), 'ID') def test_valid_path_candset_wi_valid_metadata(self):
import pandas as pd import six from magellan.utils.generic_helper import get_install_path, list_diff from magellan.io.parsers import read_csv_metadata from magellan.matcherselector.mlmatcherselection import select_matcher from magellan.matcher.dtmatcher import DTMatcher from magellan.matcher.linregmatcher import LinRegMatcher from magellan.matcher.logregmatcher import LogRegMatcher from magellan.matcher.nbmatcher import NBMatcher from magellan.matcher.rfmatcher import RFMatcher from magellan.matcher.svmmatcher import SVMMatcher import magellan.catalog.catalog_manager as cm datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets', 'matcherselector']) path_a = os.sep.join([datasets_path, 'DBLP_demo.csv']) path_b = os.sep.join([datasets_path, 'ACM_demo.csv']) path_c = os.sep.join([datasets_path, 'dblp_acm_demo_labels.csv']) path_f = os.sep.join([datasets_path, 'feat_vecs.csv']) class MLMatcherSelectionTestCases(unittest.TestCase): def setUp(self): cm.del_catalog() def tearDown(self): cm.del_catalog() # @nottest def test_select_matcher_valid_1(self):
# coding=utf-8 from __future__ import unicode_literals import os import unittest import pandas as pd from nose.tools import * from magellan.io.parsers import read_csv_metadata, to_csv_metadata, _get_metadata_from_file from magellan.utils.generic_helper import get_install_path, del_files_in_dir, creat_dir_ifnot_exists import magellan.catalog.catalog_manager as cm datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets']) io_datasets_path = os.sep.join([get_install_path(), 'datasets', 'test_datasets', 'io']) path_a = os.sep.join([datasets_path, 'A.csv']) path_b = os.sep.join([datasets_path, 'B.csv']) path_c = os.sep.join([datasets_path, 'C.csv']) sndbx_path = os.sep.join([os.sep.join([get_install_path(), 'datasets', 'test_datasets']), 'sandbox']) class ReadCSVMetadataTestCases(unittest.TestCase): def test_valid_path_wi_valid_metadata(self): cm.del_catalog() A = read_csv_metadata(path_a) pd_A = pd.read_csv(path_a) self.assertEqual(A.equals(pd_A), True) self.assertEqual(cm.get_key(A), 'ID') def test_valid_path_candset_wi_valid_metadata(self): cm.del_catalog() A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') # not initializing with ID will raise key_error C = read_csv_metadata(path_c, ltable=A, rtable=B)