def setUp(self) -> None: time_steps = 1 loss_function = "MSE" optimizer = "SGD" model = "RNN" device = "cpu" self.configuration_dictionary = {"model": model, "loss_function": loss_function, "optimizer": optimizer, "time_steps": time_steps} self.postgres_connector = PostgresConnector() data_preprocessor = DataPreprocessor() self.model_trainer = Trainer(data_preprocessor, device)
def create_grid_search(dataset_name: str, data_directory: str, model_directory: str, results_directory: str, model: str, device: str, epochs: str, loss_function_selection: str, optimizer_selection: str, batch_size: str, validation_split: str, test_split: str, time_steps: str, validation_period: str) -> MessagePassingNN: grid_search_dictionary = GridSearchParametersParser().get_grid_search_dictionary(model, epochs, loss_function_selection, optimizer_selection, batch_size, validation_split, test_split, time_steps, validation_period) data_path = _get_data_path(data_directory, dataset_name) data_preprocessor = DataPreprocessor() trainer = Trainer(data_preprocessor, device) saver = Saver(model_directory, results_directory) grid_search = GridSearch(data_path, data_preprocessor, trainer, grid_search_dictionary, saver) return MessagePassingNN(grid_search)
def _build_grid_search(self, grid_search_parameters) -> GridSearch: data_preprocessor = DataPreprocessor() postgres_connector = PostgresConnector() dataset = GraphDataset(postgres_connector) model_trainer = Trainer(data_preprocessor, self.device, postgres_connector) saver = Saver(self.model_directory, self.results_directory) grid_search_configurations = self._get_all_grid_search_configurations(grid_search_parameters) return GridSearch(dataset, data_preprocessor, model_trainer, grid_search_configurations, saver)
def setUp(self) -> None: self.features = BASE_GRAPH_NODE_FEATURES self.adjacency_matrix = BASE_GRAPH self.labels = BASE_GRAPH.view(-1) self.dataset = 'training-test-data' self.tests_data_directory = 'tests/test_data/' tests_model_directory = 'tests/model_checkpoints' tests_results_directory = 'tests/grid_search_results' device = "cpu" self.data_path = self.tests_data_directory + self.dataset + "/" self.repository = FileSystemRepository(self.tests_data_directory, self.dataset) self.data_preprocessor = DataPreprocessor() self.data_preprocessor.enable_test_mode() self.model_trainer = Trainer(self.data_preprocessor, device) self.saver = Saver(tests_model_directory, tests_results_directory)
def setUp(self) -> None: self.features = BASE_GRAPH_NODE_FEATURES self.adjacency_matrix = BASE_GRAPH self.labels = BASE_GRAPH.view(-1) self.dataset_name = 'training-test-data' self.tests_data_directory = os.path.join('tests', 'test_data') tests_model_directory = os.path.join('tests', 'model_checkpoints') tests_results_directory = os.path.join('tests', 'grid_search_results') device = "cpu" self.data_path = os.path.join("./", self.tests_data_directory, self.dataset_name) self.repository = FileSystemRepository(self.tests_data_directory, self.dataset_name) self.data_preprocessor = DataPreprocessor() self.postgres_connector = PostgresConnector() self.model_trainer = Trainer(self.data_preprocessor, device, self.postgres_connector) self.saver = Saver(tests_model_directory, tests_results_directory)
class TestTrainer(TestCase): def setUp(self) -> None: time_steps = 1 loss_function = "MSE" optimizer = "SGD" model = "RNN" device = "cpu" self.configuration_dictionary = { "model": model, "loss_function": loss_function, "optimizer": optimizer, "time_steps": time_steps } data_preprocessor = DataPreprocessor() self.model_trainer = Trainer(data_preprocessor, device, normalize=True) def test_instantiate_attributes(self): # Given number_of_nodes = BASE_GRAPH.size()[0] number_of_node_features = BASE_GRAPH_NODE_FEATURES.size()[1] data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(), BASE_GRAPH.view(-1).size()) # When self.model_trainer.instantiate_attributes( data_dimensions, self.configuration_dictionary) # Then self.assertTrue( self.model_trainer.model.number_of_nodes == number_of_nodes) self.assertTrue(self.model_trainer.model.number_of_node_features == number_of_node_features) self.assertTrue(self.model_trainer.optimizer.param_groups) def test_do_train(self): # Given data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(), BASE_GRAPH.view(-1).size()) self.model_trainer.instantiate_attributes( data_dimensions, self.configuration_dictionary) all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1], [0, 1, 3, -1], [2, -1, -1, -1]]) dataset = GraphDataset("") dataset.enable_test_mode() tag = 'tag' dataset.dataset = [(BASE_GRAPH_NODE_FEATURES, all_neighbors, BASE_GRAPH.view(-1), tag)] training_data, _, _ = DataPreprocessor().train_validation_test_split( dataset, 1, 0.0, 0.0) # When training_loss = self.model_trainer.do_train( training_data=training_data, epoch=1) # Then self.assertTrue(training_loss > 0.0) def test_do_evaluate(self): # Given data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(), BASE_GRAPH.view(-1).size()) self.model_trainer.instantiate_attributes( data_dimensions, self.configuration_dictionary) all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1], [0, 1, 3, -1], [2, -1, -1, -1]]) dataset = GraphDataset("") dataset.enable_test_mode() tag = 'tag' dataset.dataset = [(BASE_GRAPH_NODE_FEATURES, all_neighbors, BASE_GRAPH.view(-1), tag)] training_data, _, _ = DataPreprocessor().train_validation_test_split( dataset, 1, 0.0, 0.0) # When validation_loss = self.model_trainer.do_evaluate( evaluation_data=training_data, epoch=1) # Then self.assertTrue(validation_loss > 0.0)
class TestTrainer(TestCase): def setUp(self) -> None: time_steps = 1 loss_function = "MSE" optimizer = "SGD" model = "RNN" device = "cpu" self.configuration_dictionary = {"model": model, "loss_function": loss_function, "optimizer": optimizer, "time_steps": time_steps} self.postgres_connector = PostgresConnector() data_preprocessor = DataPreprocessor() self.model_trainer = Trainer(data_preprocessor, device) def test_instantiate_attributes(self): # Given data_dimensions = {"number_of_nodes": 4, "number_of_node_features": 4, "fully_connected_layer_input_size": 16, "fully_connected_layer_output_size": 8} # When self.model_trainer.build(data_dimensions, self.configuration_dictionary) # Then self.assertTrue(self.model_trainer.model.number_of_nodes == data_dimensions["number_of_nodes"]) self.assertTrue(self.model_trainer.model.number_of_node_features == data_dimensions["number_of_node_features"]) self.assertTrue(self.model_trainer.optimizer.param_groups) def test_do_train(self): # Given data_dimensions = {"number_of_nodes": 4, "number_of_node_features": 4, "fully_connected_layer_input_size": 16, "fully_connected_layer_output_size": 8} self.model_trainer.build(data_dimensions, self.configuration_dictionary) self._insert_test_data(1) dataset = GraphDataset(self.postgres_connector) training_data, _, _ = DataPreprocessor().train_validation_test_split(dataset, 1, 0.0, 0.0) # When training_loss = self.model_trainer.do_train_step(training_data=training_data, epoch=1) # Then self.assertTrue(training_loss > 0.0) self._truncate_table() def test_do_evaluate(self): # Given data_dimensions = {"number_of_nodes": 4, "number_of_node_features": 4, "fully_connected_layer_input_size": 16, "fully_connected_layer_output_size": 8} self.model_trainer.build(data_dimensions, self.configuration_dictionary) self._insert_test_data(1) dataset = GraphDataset(self.postgres_connector) training_data, _, _ = DataPreprocessor().train_validation_test_split(dataset, 1, 0.0, 0.0) # When validation_loss = self.model_trainer.do_evaluate_step(evaluation_data=training_data, epoch=1) # Then self.assertTrue(validation_loss > 0.0) self._truncate_table() def _insert_test_data(self, dataset_size): self.postgres_connector.open_connection() dataset_values = """(pdb_code varchar primary key, features float[][], neighbors float[][], labels float[])""" penalty_values = """(residue varchar primary key, matrix float[][], penalty float[][])""" self.postgres_connector.create_table(TEST_DATASET, dataset_values) self.postgres_connector.create_table(TEST_PENALTY, penalty_values) features = FEATURES_SERIALIZED neighbors = NEIGHBORS_SERIALIZED labels = LABELS_SERIALIZED penalty = PENALTY_SERIALIZED for index in range(dataset_size): self.postgres_connector.execute_insert_dataset(str(index), features, neighbors, labels) for residue in map_amino_acid_codes: self.postgres_connector.execute_insert_penalty(residue, penalty) self.postgres_connector.close_connection() def _truncate_table(self) -> None: self.postgres_connector.open_connection() self.postgres_connector.truncate_table(TEST_DATASET) self.postgres_connector.close_connection()