Beispiel #1
0
 def test_use_entities_order_two_output_edges(self):
     dataset = RawDataset(dataset_id="dataset1",
                          inference_mode=True,
                          dataset_type=DatasetType.TRAINING,
                          data_directory=self.DATASET_PATH,
                          batch_size=1)
     edges_producer = EdgesProducer(dataset.ids_of_entities,
                                    dataset.graph_edges,
                                    use_entities_order=True)
     head_edges, target_index = edges_producer.produce_head_edges(
         sample={
             "object_ids": np.array([1, 0, 1]),
             "object_types": np.array([1, 0, 2])
         })
     self.assertAllEqual([[1, 0, 1], [2, 0, 1]], head_edges["object_ids"])
     self.assertAllEqual([[1, 0, 2], [1, 0, 2]], head_edges["object_types"])
     self.assertEqual(0, target_index)
Beispiel #2
0
 def test_edge_pattern_in_existing_edges(self, randint_mock):
     randint_mock.return_value = 0
     dataset = RawDataset(dataset_id="dataset1",
                          inference_mode=True,
                          dataset_type=DatasetType.TRAINING,
                          data_directory=self.DATASET_PATH,
                          batch_size=1)
     edges_producer = EdgesProducer(dataset.ids_of_entities,
                                    dataset.graph_edges,
                                    use_entities_order=False)
     edges, target_index = edges_producer.produce_head_edges(
         sample={
             "object_ids": np.array([0, 0, 1]),
             "object_types": np.array([1, 0, 2])
         })
     self.assertAllEqual((3, 3), edges["object_ids"].shape)
     self.assertAllEqual([0, 0, 1], edges["object_ids"][0])
     self.assertEqual(0, target_index)
 def test_incremental_edges(self):
     np.random.seed(2)
     dataset = RawDataset(
         dataset_id="dataset1",
         dataset_type=DatasetType.VALIDATION,
         data_directory=self.DATASET_PATH,
         shuffle_dataset=False,
         batch_size=1,
         inference_mode=False,
     )
     self.assertDictEqual(dataset.known_entity_output_edges, {
         0: [(1, 0)],
         1: [(2, 1)]
     })
     self.assertDictEqual(dataset.known_entity_input_edges, {
         1: [(0, 0)],
         2: [(1, 1)]
     })
Beispiel #4
0
 def test_multiple_object_ids(self, randint_mock):
     randint_mock.return_value = 0
     dataset = RawDataset(dataset_id="dataset1",
                          inference_mode=True,
                          dataset_type=DatasetType.TRAINING,
                          data_directory=self.DATASET_PATH,
                          batch_size=1)
     edges_producer = EdgesProducer(dataset.ids_of_entities,
                                    dataset.graph_edges,
                                    use_entities_order=False)
     edges, target_index = edges_producer.produce_head_edges(sample={
         "object_ids":
         np.array([1, 0, 1, 2, 3]),
         "object_types":
         np.array([1, 0, 2, 1, 0])
     }, )
     self.assertAllEqual([[1, 0, 1, 2, 3], [2, 0, 1, 2, 3]],
                         edges["object_ids"])
     self.assertAllEqual([[1, 0, 2, 1, 0], [1, 0, 2, 1, 0]],
                         edges["object_types"])
     self.assertEqual(0, target_index)
Beispiel #5
0
def main() -> None:

    args = parse_args()
    raw_dataset = RawDataset(subsample=args.subsample)
    print(f'Raw dataset has {len(raw_dataset.raw_df)} records.')

    processed_dataset = ProcessedDataset(raw_dataset)
    print(
        f'Processed dataset has {len(processed_dataset.processed_df)} records.'
    )

    featurized_dataset = FeaturizedDataset(processed_dataset)
    print(
        f'Featurized dataset has {len(featurized_dataset.featurized_df)} records.'
    )

    model = RNNModel(featurized_dataset)
    handler = ModelHandler(model, use_wandb=True)
    print(f'Comensing training on {handler.device}')
    handler.fit(args.max_epochs)
    print('Done.')