Example #1
0
    def test_do_inference(self):
        # Given
        data_preprocessor = DataPreprocessor()
        device = "cpu"
        inferencer = Inferencer(data_preprocessor, device)
        data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(), BASE_GRAPH.size(), BASE_GRAPH.view(-1).size())
        model = ModelSelector.load_model("RNN")
        model = model(time_steps=1,
                      number_of_nodes=data_dimensions[1][0],
                      number_of_node_features=data_dimensions[0][1],
                      fully_connected_layer_input_size=data_dimensions[1][0] * data_dimensions[0][1],
                      fully_connected_layer_output_size=data_dimensions[2][0])
        all_neighbors = to.tensor([[1, 2, -1, -1],
                                   [0, 2, -1, -1],
                                   [0, 1, 3, -1],
                                   [2, -1, -1, -1]])
        dataset = GraphDataset("")
        dataset.enable_test_mode()
        tag = 'tag'
        dataset.dataset = [(BASE_GRAPH_NODE_FEATURES, all_neighbors, BASE_GRAPH.view(-1), tag)]
        inference_data, _, _ = DataPreprocessor().train_validation_test_split(dataset, 1, 0.0, 0.0)
        output_label_pairs_expected = [BASE_GRAPH.view(-1), BASE_GRAPH.view(-1)]

        # When
        output_label_pairs = inferencer.do_inference(model, inference_data)

        # Then
        self.assertEqual(output_label_pairs[0][0].squeeze().size(), output_label_pairs_expected[0].size())
        self.assertEqual(output_label_pairs[0][1].squeeze().size(), output_label_pairs_expected[1].size())
Example #2
0
class TestGraphPreprocessor(TestCase):
    def setUp(self) -> None:
        self.data_preprocessor = DataPreprocessor()
        self.postgres_connector = PostgresConnector()

    def test_train_validation_test_split(self):
        # Given
        self._insert_test_data(dataset_size=10)
        dataset = GraphDataset(self.postgres_connector)
        train_validation_test_split_expected = [7, 2, 1]

        # When
        train_validation_test_split = self.data_preprocessor.train_validation_test_split(dataset,
                                                                                         batch_size=1,
                                                                                         validation_split=0.2,
                                                                                         test_split=0.1)
        train_validation_test_split = [len(dataset) for dataset in train_validation_test_split]

        # Then
        self.assertEqual(train_validation_test_split_expected, train_validation_test_split)
        self._truncate_table()

    def test_extract_data_dimensions(self):
        # Given
        self._insert_test_data(dataset_size=1)
        dataset = GraphDataset(self.postgres_connector)
        data_dimensions_expected = {"number_of_nodes": 4,
                                    "number_of_node_features": 4,
                                    "fully_connected_layer_input_size": 16,
                                    "fully_connected_layer_output_size": 8}

        # When
        data_dimensions = self.data_preprocessor.extract_data_dimensions(dataset)

        # Then
        self.assertEqual(data_dimensions_expected, data_dimensions)
        self._truncate_table()

    def _insert_test_data(self, dataset_size):
        self.postgres_connector.open_connection()
        features = FEATURES_SERIALIZED
        neighbors = NEIGHBORS_SERIALIZED
        labels = LABELS_SERIALIZED
        for index in range(dataset_size):
            self.postgres_connector.execute_insert_dataset(str(index), features, neighbors, labels)
        self.postgres_connector.close_connection()

    def _truncate_table(self) -> None:
        self.postgres_connector.open_connection()
        self.postgres_connector.truncate_table(TEST_DATASET)
        self.postgres_connector.close_connection()
def create_grid_search(dataset_name: str,
                       data_directory: str,
                       model_directory: str,
                       results_directory: str,
                       model: str,
                       device: str,
                       epochs: str,
                       loss_function_selection: str,
                       optimizer_selection: str,
                       batch_size: str,
                       validation_split: str,
                       test_split: str,
                       time_steps: str,
                       validation_period: str) -> MessagePassingNN:
    grid_search_dictionary = GridSearchParametersParser().get_grid_search_dictionary(model,
                                                                                     epochs,
                                                                                     loss_function_selection,
                                                                                     optimizer_selection,
                                                                                     batch_size,
                                                                                     validation_split,
                                                                                     test_split,
                                                                                     time_steps,
                                                                                     validation_period)
    data_path = _get_data_path(data_directory, dataset_name)
    data_preprocessor = DataPreprocessor()
    trainer = Trainer(data_preprocessor, device)
    saver = Saver(model_directory, results_directory)
    grid_search = GridSearch(data_path,
                             data_preprocessor,
                             trainer,
                             grid_search_dictionary,
                             saver)
    return MessagePassingNN(grid_search)
Example #4
0
 def setUp(self) -> None:
     self.features = BASE_GRAPH_NODE_FEATURES
     self.adjacency_matrix = BASE_GRAPH
     self.labels = BASE_GRAPH.view(-1)
     self.dataset = 'training-test-data'
     self.tests_data_directory = 'tests/test_data/'
     tests_model_directory = 'tests/model_checkpoints'
     tests_results_directory = 'tests/grid_search_results'
     device = "cpu"
     self.data_path = self.tests_data_directory + self.dataset + "/"
     self.repository = FileSystemRepository(self.tests_data_directory,
                                            self.dataset)
     self.data_preprocessor = DataPreprocessor()
     self.data_preprocessor.enable_test_mode()
     self.model_trainer = Trainer(self.data_preprocessor, device)
     self.saver = Saver(tests_model_directory, tests_results_directory)
    def test_start(self):
        # Given
        tests_model_directory = os.path.join(
            "tests", "test_data", "model-checkpoints-test",
            "configuration&id__model&RNN__epochs&10__loss_function&MSE__optimizer"
            "&Adagrad__batch_size&100__validation_split&0.2__test_split"
            "&0.1__time_steps&1__validation_period&5",
            "Epoch_5_model_state_dictionary.pth")
        tests_results_directory = os.path.join('tests', 'results_inference')
        device = "cpu"
        data_preprocessor = DataPreprocessor()
        loader = Loader("RNN")
        inferencer = Inferencer(data_preprocessor, device)
        saver = Saver(tests_model_directory, tests_results_directory)
        self.postgres_connector = PostgresConnector()
        self._insert_test_data(dataset_size=1)
        dataset = GraphDataset(self.postgres_connector)
        inference = Inference(dataset, data_preprocessor, loader, inferencer,
                              saver)

        # When
        inference.start()

        # Then
        filename_expected = datetime.now().strftime(
            "%d-%b-%YT%H_%M") + "_distance_maps.pickle"
        self.assertTrue(
            os.path.isfile(
                os.path.join(tests_results_directory, filename_expected)))

        # Tear down
        self._truncate_table()
 def _build_grid_search(self, grid_search_parameters) -> GridSearch:
     data_preprocessor = DataPreprocessor()
     postgres_connector = PostgresConnector()
     dataset = GraphDataset(postgres_connector)
     model_trainer = Trainer(data_preprocessor, self.device, postgres_connector)
     saver = Saver(self.model_directory, self.results_directory)
     grid_search_configurations = self._get_all_grid_search_configurations(grid_search_parameters)
     return GridSearch(dataset, data_preprocessor, model_trainer, grid_search_configurations, saver)
Example #7
0
    def test_start(self):
        # Given
        dataset_size = 1
        features = to.ones(4, 2)
        adjacency_matrix = to.ones(4, 4)
        labels = to.ones(16)
        dataset = 'inference-test-data'
        tests_data_directory = 'tests/test_data/'
        tests_model_directory = "tests/test_data/model-checkpoints-test/configuration&id__model&" + \
                                "RNN__epochs&10__loss_function&MSE__optimizer&Adagrad__batch_size&" + \
                                "100__validation_split&0.2__test_split&0.1__time_steps&1__validation_period&" + \
                                "5/Epoch_5_model_state_dictionary.pth"
        tests_results_directory = 'tests/results_inference'
        device = "cpu"
        repository = FileSystemRepository(tests_data_directory, dataset)
        data_path = tests_data_directory + dataset + "/"
        data_preprocessor = DataPreprocessor()
        data_preprocessor.enable_test_mode()
        loader = Loader("RNN")
        inferencer = Inferencer(data_preprocessor, device)
        saver = Saver(tests_model_directory, tests_results_directory)
        inference = Inference(data_path,
                              data_preprocessor,
                              loader,
                              inferencer,
                              saver,
                              test_mode=True)

        adjacency_matrix_filenames, features_filenames, labels_filenames = self._save_test_data(
            adjacency_matrix, dataset_size, features, labels, repository)

        # When
        inference.start()

        # Then
        filename_expected = datetime.now().strftime(
            "%d-%b-%YT%H_%M") + "_distance_maps.pickle"
        self.assertTrue(
            os.path.isfile(tests_results_directory + "/" + filename_expected))

        # Tear down
        self._remove_files(dataset_size, features_filenames,
                           adjacency_matrix_filenames, labels_filenames,
                           tests_data_directory, dataset,
                           tests_results_directory)
 def _build_inference(self) -> Inference:
     self.model = os.environ['MODEL']
     data_preprocessor = DataPreprocessor()
     model_loader = Loader(self.model)
     model_inferencer = Inferencer(data_preprocessor, self.device)
     postgres_connector = PostgresConnector()
     dataset = GraphDataset(postgres_connector)
     saver = Saver(self.model_directory, self.results_directory)
     return Inference(dataset, data_preprocessor, model_loader, model_inferencer, saver)
 def setUp(self) -> None:
     time_steps = 1
     loss_function = "MSE"
     optimizer = "SGD"
     model = "RNN"
     device = "cpu"
     self.configuration_dictionary = {"model": model,
                                      "loss_function": loss_function,
                                      "optimizer": optimizer,
                                      "time_steps": time_steps}
     self.postgres_connector = PostgresConnector()
     data_preprocessor = DataPreprocessor()
     self.model_trainer = Trainer(data_preprocessor, device)
def create_inference(dataset_name: str,
                     data_directory: str,
                     model_directory: str,
                     results_directory: str,
                     model: str,
                     device: str) -> MessagePassingNN:
    data_path = data_directory + dataset_name + "/"
    data_preprocessor = DataPreprocessor()
    model_loader = Loader(model)
    model_inferencer = Inferencer(data_preprocessor, device)
    saver = Saver(model_directory, results_directory)
    inference = Inference(data_path, data_preprocessor, model_loader, model_inferencer, saver)
    return MessagePassingNN(inference)
    def test_do_inference(self):
        # Given
        data_preprocessor = DataPreprocessor()
        device = "cpu"
        inferencer = Inferencer(data_preprocessor, device)
        data_dimensions = {
            "number_of_nodes": 4,
            "number_of_node_features": 4,
            "fully_connected_layer_input_size": 16,
            "fully_connected_layer_output_size": 8
        }
        model = load_model("RNN")
        model = model(
            time_steps=1,
            number_of_nodes=data_dimensions["number_of_nodes"],
            number_of_node_features=data_dimensions["number_of_node_features"],
            fully_connected_layer_input_size=data_dimensions[
                "fully_connected_layer_input_size"],
            fully_connected_layer_output_size=data_dimensions[
                "fully_connected_layer_output_size"])
        self.postgres_connector = PostgresConnector()
        self._insert_test_data(1)
        dataset = GraphDataset(self.postgres_connector)
        inference_data, _, _ = DataPreprocessor().train_validation_test_split(
            dataset, 1, 0.0, 0.0)
        output_label_pairs_expected = [
            to.tensor((0, 1, 0, 2, 1, 2, 0, 0)),
            to.tensor((0, 1, 0, 2, 1, 2, 0, 0))
        ]

        # When
        output_label_pairs = inferencer.do_inference(model, inference_data)

        # Then
        self.assertEqual(output_label_pairs[0][0].squeeze().size(),
                         output_label_pairs_expected[0].size())
        self.assertEqual(output_label_pairs[0][1].squeeze().size(),
                         output_label_pairs_expected[1].size())
    def test_do_evaluate(self):
        # Given
        data_dimensions = {"number_of_nodes": 4,
                           "number_of_node_features": 4,
                           "fully_connected_layer_input_size": 16,
                           "fully_connected_layer_output_size": 8}
        self.model_trainer.build(data_dimensions, self.configuration_dictionary)
        self._insert_test_data(1)
        dataset = GraphDataset(self.postgres_connector)
        training_data, _, _ = DataPreprocessor().train_validation_test_split(dataset, 1, 0.0, 0.0)

        # When
        validation_loss = self.model_trainer.do_evaluate_step(evaluation_data=training_data, epoch=1)

        # Then
        self.assertTrue(validation_loss > 0.0)
        self._truncate_table()
Example #13
0
 def setUp(self) -> None:
     self.features = BASE_GRAPH_NODE_FEATURES
     self.adjacency_matrix = BASE_GRAPH
     self.labels = BASE_GRAPH.view(-1)
     self.dataset_name = 'training-test-data'
     self.tests_data_directory = os.path.join('tests', 'test_data')
     tests_model_directory = os.path.join('tests', 'model_checkpoints')
     tests_results_directory = os.path.join('tests', 'grid_search_results')
     device = "cpu"
     self.data_path = os.path.join("./", self.tests_data_directory,
                                   self.dataset_name)
     self.repository = FileSystemRepository(self.tests_data_directory,
                                            self.dataset_name)
     self.data_preprocessor = DataPreprocessor()
     self.postgres_connector = PostgresConnector()
     self.model_trainer = Trainer(self.data_preprocessor, device,
                                  self.postgres_connector)
     self.saver = Saver(tests_model_directory, tests_results_directory)
Example #14
0
    def test_do_evaluate(self):
        # Given
        data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(),
                           BASE_GRAPH.view(-1).size())
        self.model_trainer.instantiate_attributes(
            data_dimensions, self.configuration_dictionary)
        all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1],
                                   [0, 1, 3, -1], [2, -1, -1, -1]])
        dataset = GraphDataset("")
        dataset.enable_test_mode()
        tag = 'tag'
        dataset.dataset = [(BASE_GRAPH_NODE_FEATURES, all_neighbors,
                            BASE_GRAPH.view(-1), tag)]
        training_data, _, _ = DataPreprocessor().train_validation_test_split(
            dataset, 1, 0.0, 0.0)

        # When
        validation_loss = self.model_trainer.do_evaluate(
            evaluation_data=training_data, epoch=1)

        # Then
        self.assertTrue(validation_loss > 0.0)
 def setUp(self) -> None:
     self.data_preprocessor = DataPreprocessor()
Example #16
0
 def setUp(self) -> None:
     self.data_preprocessor = DataPreprocessor()
     self.postgres_connector = PostgresConnector()
Example #17
0
class TestTraining(TestCase):
    def setUp(self) -> None:
        self.features = BASE_GRAPH_NODE_FEATURES
        self.adjacency_matrix = BASE_GRAPH
        self.labels = BASE_GRAPH.view(-1)
        self.dataset = 'training-test-data'
        self.tests_data_directory = 'tests/test_data/'
        tests_model_directory = 'tests/model_checkpoints'
        tests_results_directory = 'tests/grid_search_results'
        device = "cpu"
        self.data_path = self.tests_data_directory + self.dataset + "/"
        self.repository = FileSystemRepository(self.tests_data_directory,
                                               self.dataset)
        self.data_preprocessor = DataPreprocessor()
        self.data_preprocessor.enable_test_mode()
        self.model_trainer = Trainer(self.data_preprocessor, device)
        self.saver = Saver(tests_model_directory, tests_results_directory)

    def test_start_for_multiple_batches_of_the_same_size(self):
        # Given
        dataset_size = 6
        grid_search_dictionary = {
            "model": ["RNN"],
            "epochs": [10],
            "batch_size": [3],
            "validation_split": [0.2],
            "test_split": [0.1],
            "loss_function": ["MSE"],
            "optimizer": ["SGD"],
            "time_steps": [1],
            "validation_period": [5]
        }
        grid_search = GridSearch(self.data_path,
                                 self.data_preprocessor,
                                 self.model_trainer,
                                 grid_search_dictionary,
                                 self.saver,
                                 test_mode=True)

        adjacency_matrix_filenames, features_filenames, labels_filenames = self._save_test_data(
            dataset_size)

        # When
        losses = grid_search.start()
        configuration_id = list(losses["training_loss"].keys())[0]

        # Then
        self.assertTrue(losses["training_loss"][configuration_id][
            grid_search_dictionary["epochs"][0]] > 0.0)
        self.assertTrue(losses["validation_loss"][configuration_id][
            grid_search_dictionary["validation_period"][0]] > 0.0)
        self.assertTrue(
            losses["test_loss"][configuration_id]["final_epoch"] > 0.0)

        # Tear down
        self._remove_files(dataset_size, features_filenames,
                           adjacency_matrix_filenames, labels_filenames)

    def test_start_for_multiple_batches_of_differing_size(self):
        # Given
        dataset_size = 5
        grid_search_dictionary = {
            "model": ["RNN"],
            "epochs": [10],
            "batch_size": [3],
            "validation_split": [0.2],
            "test_split": [0.1],
            "loss_function": ["MSE"],
            "optimizer": ["SGD"],
            "time_steps": [1],
            "validation_period": [5]
        }
        grid_search = GridSearch(self.data_path,
                                 self.data_preprocessor,
                                 self.model_trainer,
                                 grid_search_dictionary,
                                 self.saver,
                                 test_mode=True)

        adjacency_matrix_filenames, features_filenames, labels_filenames = self._save_test_data(
            dataset_size)

        # When
        losses = grid_search.start()
        configuration_id = list(losses["training_loss"].keys())[0]

        # Then
        self.assertTrue(losses["training_loss"][configuration_id][
            grid_search_dictionary["epochs"][0]] > 0.0)
        self.assertTrue(losses["validation_loss"][configuration_id][
            grid_search_dictionary["validation_period"][0]] > 0.0)
        self.assertTrue(
            losses["test_loss"][configuration_id]["final_epoch"] > 0.0)

        # Tear down
        self._remove_files(dataset_size, features_filenames,
                           adjacency_matrix_filenames, labels_filenames)

    def test_start_a_grid_search(self):
        # Given
        dataset_size = 6
        grid_search_dictionary = {
            "model": ["RNN"],
            "epochs": [10, 15],
            "batch_size": [3, 4],
            "validation_split": [0.2],
            "test_split": [0.1],
            "loss_function": ["MSE"],
            "optimizer": ["SGD"],
            "time_steps": [1],
            "validation_period": [5]
        }
        grid_search = GridSearch(self.data_path,
                                 self.data_preprocessor,
                                 self.model_trainer,
                                 grid_search_dictionary,
                                 self.saver,
                                 test_mode=True)

        adjacency_matrix_filenames, features_filenames, labels_filenames = self._save_test_data(
            dataset_size)

        # When
        losses = grid_search.start()
        configuration_id = list(losses["training_loss"].keys())[0]

        # Then
        self.assertTrue(losses["training_loss"][configuration_id][
            grid_search_dictionary["epochs"][0]] > 0.0)
        self.assertTrue(losses["validation_loss"][configuration_id][
            grid_search_dictionary["validation_period"][0]] > 0.0)
        self.assertTrue(
            losses["test_loss"][configuration_id]["final_epoch"] > 0.0)

        # Tear down
        self._remove_files(dataset_size, features_filenames,
                           adjacency_matrix_filenames, labels_filenames)

    def _save_test_data(self, dataset_size):
        features_filenames = [
            str(i) + '_training_features' + '.pickle'
            for i in range(dataset_size)
        ]
        adjacency_matrix_filenames = [
            str(i) + '_training_adjacency-matrix'
            '.pickle' for i in range(dataset_size)
        ]
        labels_filenames = [
            str(i) + '_training_labels'
            '.pickle' for i in range(dataset_size)
        ]
        for i in range(dataset_size):
            self.repository.save(features_filenames[i], self.features)
            self.repository.save(adjacency_matrix_filenames[i],
                                 self.adjacency_matrix)
            self.repository.save(labels_filenames[i], self.labels)
        return adjacency_matrix_filenames, features_filenames, labels_filenames

    def _remove_files(self, dataset_size: int, features_filenames: List[str],
                      adjacency_matrix_filenames: List[str],
                      labels_filenames: List[str]) -> None:
        for i in range(dataset_size):
            os.remove(self.tests_data_directory + self.dataset + "/" +
                      features_filenames[i])
            os.remove(self.tests_data_directory + self.dataset + "/" +
                      adjacency_matrix_filenames[i])
            os.remove(self.tests_data_directory + self.dataset + "/" +
                      labels_filenames[i])
class TestGraphPreprocessor(TestCase):
    def setUp(self) -> None:
        self.data_preprocessor = DataPreprocessor()

    def test_train_validation_test_split(self):
        # Given
        dataset_length = 10
        features = BASE_GRAPH_NODE_FEATURES
        all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1],
                                   [0, 1, 3, -1], [2, -1, -1, -1]])
        labels = BASE_GRAPH.view(-1)
        dataset = GraphDataset("")
        dataset.enable_test_mode()
        dataset.dataset = [(features, all_neighbors, labels, i)
                           for i in range(dataset_length)]
        train_validation_test_split_expected = [7, 2, 1]

        # When
        train_validation_test_split = self.data_preprocessor.train_validation_test_split(
            dataset, batch_size=1, validation_split=0.2, test_split=0.1)
        train_validation_test_split = [
            len(dataset) for dataset in train_validation_test_split
        ]

        # Then
        self.assertEqual(train_validation_test_split_expected,
                         train_validation_test_split)

    def test_extract_data_dimensions(self):
        # Given
        dataset_length = 1
        features = BASE_GRAPH_NODE_FEATURES
        all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1],
                                   [0, 1, 3, -1], [2, -1, -1, -1]])
        labels = BASE_GRAPH.view(-1)
        dataset = GraphDataset("")
        dataset.enable_test_mode()
        dataset.dataset = [(features, all_neighbors, labels, i)
                           for i in range(dataset_length)]
        data_dimensions_expected = (features.size(), labels.size())

        # When
        data_dimensions = self.data_preprocessor.extract_data_dimensions(
            dataset)

        # Then
        self.assertEqual(data_dimensions_expected, data_dimensions)

    def test_flatten_when_sizes_match(self):
        # Given
        dataset_length = 2
        labels = BASE_GRAPH.view(-1)
        tensors = to.cat((labels, labels))
        tensors_flattened_expected = tensors.view(-1)

        # When
        tensors_flattened = self.data_preprocessor.flatten(
            tensors, desired_size=dataset_length * len(labels))

        # Then
        self.assertTrue(
            to.allclose(tensors_flattened_expected, tensors_flattened))

    def test_flatten_when_sizes_do_not_match(self):
        # Given
        dataset_length = 3
        labels = BASE_GRAPH.view(-1)
        tensors = to.cat((labels, labels))
        tensors_flattened_expected = to.cat(
            (tensors.view(-1), to.zeros_like(labels)))

        # When
        tensors_flattened = self.data_preprocessor.flatten(
            tensors, desired_size=dataset_length * len(labels))

        # Then
        self.assertTrue(
            to.allclose(tensors_flattened_expected, tensors_flattened))