예제 #1
0
def split_abt_buy(rnd: random.Random):
    model = BootEA()
    model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json"))
    dataset = CsvDataset(
        CsvType.products,
        f"{base_path}/abt-buy/Abt.csv",
        f"{base_path}/abt-buy/Buy.csv",
        f"{base_path}/abt-buy/abt_buy_perfectMapping.csv",
        model,
        rnd,
    )
    print(f"abt-buy: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}")
    split_dataset(dataset, f"{base_path}/abt-buy")
예제 #2
0
    def test_create(self):
        """Test Create

        Args:
            self: TestCsvDataset

        Returns:
            None

        Raises:
            None

        """
        __location__ = os.path.realpath(
            os.path.join(os.getcwd(), os.path.dirname(__file__)))
        dataset_csv_file_path = os.path.join(__location__, 'sample_data.csv')

        CsvDataset(dataset_csv_file_path, None, False)

        # test with bad dataset_csv_file_path
        with self.assertRaises(PreconditionException) as context:
            CsvDataset(1)

        self.assertTrue('Invalid dataset_csv_file_path' in str(context.exception))
예제 #3
0
def split_amazon_google(rnd: random.Random):
    model = BootEA()
    model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json"))
    dataset = CsvDataset(
        CsvType.products,
        f"{base_path}/amazon-google/Amazon.csv",
        f"{base_path}/amazon-google/GoogleProducts.csv",
        f"{base_path}/amazon-google/Amzon_GoogleProducts_perfectMapping.csv",
        model,
        rnd,
    )
    print(
        f"amazon-google: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}"
    )
    split_dataset(dataset, f"{base_path}/amazon-google")
예제 #4
0
def split_dblp_scholar(rnd: random.Random):
    model = BootEA()
    model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json"))
    dataset = CsvDataset(
        CsvType.articles,
        f"{base_path}/dblp-scholar/DBLP1.csv",
        f"{base_path}/dblp-scholar/Scholar.csv",
        f"{base_path}/dblp-scholar/DBLP-Scholar_perfectMapping.csv",
        model,
        rnd,
    )
    print(
        f"dblp-scholar: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}"
    )
    split_dataset(dataset, f"{base_path}/dblp-scholar")
예제 #5
0
def split_dblp_acm(rnd: random.Random):
    model = BootEA()
    model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json"))
    dataset = CsvDataset(
        CsvType.articles,
        f"{base_path}/dblp-acm/DBLP2.csv",
        f"{base_path}/dblp-acm/ACM.csv",
        f"{base_path}/dblp-acm/DBLP-ACM_perfectMapping.csv",
        model,
        rnd,
    )
    print(
        f"dblp2-acm: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}"
    )
    split_dataset(dataset, f"{base_path}/dblp-acm")
예제 #6
0
    def test_load(self):
        """Test Load

        Args:
            self: TestCsvDataset

        Returns:
            None

        Raises:
            None
        """
        __location__ = os.path.realpath(
            os.path.join(os.getcwd(), os.path.dirname(__file__)))
        dataset_csv_file_path = os.path.join(__location__, 'sample_data.csv')

        csv_dataset = CsvDataset(dataset_csv_file_path, None, False)
        csv_dataset.load(dataset_csv_file_path)

        self.assertEqual(len(csv_dataset.get_field_names()), 18)

        policy_ids = csv_dataset.get_field('policyID', np.int)
        self.assertEqual(len(policy_ids), 3)
        self.assertEqual(policy_ids[0], 119736)
        self.assertEqual(policy_ids[1], 448094)
        self.assertEqual(policy_ids[2], 206893)

        state_codes = csv_dataset.get_field('statecode')
        self.assertEqual(len(state_codes), 3)
        self.assertEqual(state_codes[0], 'FL')
        self.assertEqual(state_codes[1], 'CA')
        self.assertEqual(state_codes[2], 'FL')

        # test with bad dataset_csv_file_path
        with self.assertRaises(PreconditionException) as context:
            CsvDataset(1)

        self.assertTrue('Invalid dataset_csv_file_path' in str(context.exception))
예제 #7
0
    def __init__(self,
                 dataset_csv_file_path,
                 output_folder_path,
                 log_file_path):
        """Test Analysis

        Args:
            dataset_csv_file_path: absolute path to the csv file
            output_folder_path: output folder path
            log_file_path: path to the log file

        Returns:
            None

        Raises:
            None
        """
        super(__class__, self).__init__(self.ANALYSIS_NAME, log_file_path, output_folder_path)

        self.dataset_csv_file_path = os.path.abspath(dataset_csv_file_path)
        self.dataset = CsvDataset(dataset_csv_file_path, self.logger)

        self.logger.trace('Finished Loading Test Analysis Dataset...')