def split_abt_buy(rnd: random.Random): model = BootEA() model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json")) dataset = CsvDataset( CsvType.products, f"{base_path}/abt-buy/Abt.csv", f"{base_path}/abt-buy/Buy.csv", f"{base_path}/abt-buy/abt_buy_perfectMapping.csv", model, rnd, ) print(f"abt-buy: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}") split_dataset(dataset, f"{base_path}/abt-buy")
def test_create(self): """Test Create Args: self: TestCsvDataset Returns: None Raises: None """ __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) dataset_csv_file_path = os.path.join(__location__, 'sample_data.csv') CsvDataset(dataset_csv_file_path, None, False) # test with bad dataset_csv_file_path with self.assertRaises(PreconditionException) as context: CsvDataset(1) self.assertTrue('Invalid dataset_csv_file_path' in str(context.exception))
def split_amazon_google(rnd: random.Random): model = BootEA() model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json")) dataset = CsvDataset( CsvType.products, f"{base_path}/amazon-google/Amazon.csv", f"{base_path}/amazon-google/GoogleProducts.csv", f"{base_path}/amazon-google/Amzon_GoogleProducts_perfectMapping.csv", model, rnd, ) print( f"amazon-google: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}" ) split_dataset(dataset, f"{base_path}/amazon-google")
def split_dblp_scholar(rnd: random.Random): model = BootEA() model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json")) dataset = CsvDataset( CsvType.articles, f"{base_path}/dblp-scholar/DBLP1.csv", f"{base_path}/dblp-scholar/Scholar.csv", f"{base_path}/dblp-scholar/DBLP-Scholar_perfectMapping.csv", model, rnd, ) print( f"dblp-scholar: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}" ) split_dataset(dataset, f"{base_path}/dblp-scholar")
def split_dblp_acm(rnd: random.Random): model = BootEA() model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json")) dataset = CsvDataset( CsvType.articles, f"{base_path}/dblp-acm/DBLP2.csv", f"{base_path}/dblp-acm/ACM.csv", f"{base_path}/dblp-acm/DBLP-ACM_perfectMapping.csv", model, rnd, ) print( f"dblp2-acm: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}" ) split_dataset(dataset, f"{base_path}/dblp-acm")
def test_load(self): """Test Load Args: self: TestCsvDataset Returns: None Raises: None """ __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) dataset_csv_file_path = os.path.join(__location__, 'sample_data.csv') csv_dataset = CsvDataset(dataset_csv_file_path, None, False) csv_dataset.load(dataset_csv_file_path) self.assertEqual(len(csv_dataset.get_field_names()), 18) policy_ids = csv_dataset.get_field('policyID', np.int) self.assertEqual(len(policy_ids), 3) self.assertEqual(policy_ids[0], 119736) self.assertEqual(policy_ids[1], 448094) self.assertEqual(policy_ids[2], 206893) state_codes = csv_dataset.get_field('statecode') self.assertEqual(len(state_codes), 3) self.assertEqual(state_codes[0], 'FL') self.assertEqual(state_codes[1], 'CA') self.assertEqual(state_codes[2], 'FL') # test with bad dataset_csv_file_path with self.assertRaises(PreconditionException) as context: CsvDataset(1) self.assertTrue('Invalid dataset_csv_file_path' in str(context.exception))
def __init__(self, dataset_csv_file_path, output_folder_path, log_file_path): """Test Analysis Args: dataset_csv_file_path: absolute path to the csv file output_folder_path: output folder path log_file_path: path to the log file Returns: None Raises: None """ super(__class__, self).__init__(self.ANALYSIS_NAME, log_file_path, output_folder_path) self.dataset_csv_file_path = os.path.abspath(dataset_csv_file_path) self.dataset = CsvDataset(dataset_csv_file_path, self.logger) self.logger.trace('Finished Loading Test Analysis Dataset...')