def test_do_inference(self): # Given data_preprocessor = DataPreprocessor() device = "cpu" inferencer = Inferencer(data_preprocessor, device) data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(), BASE_GRAPH.size(), BASE_GRAPH.view(-1).size()) model = ModelSelector.load_model("RNN") model = model(time_steps=1, number_of_nodes=data_dimensions[1][0], number_of_node_features=data_dimensions[0][1], fully_connected_layer_input_size=data_dimensions[1][0] * data_dimensions[0][1], fully_connected_layer_output_size=data_dimensions[2][0]) all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1], [0, 1, 3, -1], [2, -1, -1, -1]]) dataset = GraphDataset("") dataset.enable_test_mode() tag = 'tag' dataset.dataset = [(BASE_GRAPH_NODE_FEATURES, all_neighbors, BASE_GRAPH.view(-1), tag)] inference_data, _, _ = DataPreprocessor().train_validation_test_split(dataset, 1, 0.0, 0.0) output_label_pairs_expected = [BASE_GRAPH.view(-1), BASE_GRAPH.view(-1)] # When output_label_pairs = inferencer.do_inference(model, inference_data) # Then self.assertEqual(output_label_pairs[0][0].squeeze().size(), output_label_pairs_expected[0].size()) self.assertEqual(output_label_pairs[0][1].squeeze().size(), output_label_pairs_expected[1].size())
def test_save(self): # Given features = BASE_GRAPH_NODE_FEATURES adjacency_matrix = BASE_GRAPH labels = BASE_GRAPH.view(-1) filenames_to_save = [ 'code_features.pickle', 'code_adjacency-matrix.pickle', 'code_labels.pickle' ] filenames_expected = [ self.tests_data_directory + self.dataset + '/code_features.pickle', self.tests_data_directory + self.dataset + '/code_adjacency-matrix.pickle', self.tests_data_directory + self.dataset + '/code_labels.pickle' ] # When self.file_system_repository.save(filenames_to_save[0], features) self.file_system_repository.save(filenames_to_save[1], adjacency_matrix) self.file_system_repository.save(filenames_to_save[2], labels) # Then path.exists(filenames_expected[0]) path.exists(filenames_expected[1]) path.exists(filenames_expected[2]) os.remove(filenames_expected[0]) os.remove(filenames_expected[1]) os.remove(filenames_expected[2])
def test_instantiate_attributes(self): # Given number_of_nodes = BASE_GRAPH.size()[0] number_of_node_features = BASE_GRAPH_NODE_FEATURES.size()[1] data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(), BASE_GRAPH.view(-1).size()) # When self.model_trainer.instantiate_attributes( data_dimensions, self.configuration_dictionary) # Then self.assertTrue( self.model_trainer.model.number_of_nodes == number_of_nodes) self.assertTrue(self.model_trainer.model.number_of_node_features == number_of_node_features) self.assertTrue(self.model_trainer.optimizer.param_groups)
def test_do_evaluate(self): # Given data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(), BASE_GRAPH.view(-1).size()) self.model_trainer.instantiate_attributes( data_dimensions, self.configuration_dictionary) all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1], [0, 1, 3, -1], [2, -1, -1, -1]]) dataset = GraphDataset("") dataset.enable_test_mode() tag = 'tag' dataset.dataset = [(BASE_GRAPH_NODE_FEATURES, all_neighbors, BASE_GRAPH.view(-1), tag)] training_data, _, _ = DataPreprocessor().train_validation_test_split( dataset, 1, 0.0, 0.0) # When validation_loss = self.model_trainer.do_evaluate( evaluation_data=training_data, epoch=1) # Then self.assertTrue(validation_loss > 0.0)
def test_flatten_when_sizes_match(self): # Given dataset_length = 2 labels = BASE_GRAPH.view(-1) tensors = to.cat((labels, labels)) tensors_flattened_expected = tensors.view(-1) # When tensors_flattened = self.data_preprocessor.flatten( tensors, desired_size=dataset_length * len(labels)) # Then self.assertTrue( to.allclose(tensors_flattened_expected, tensors_flattened))
def setUp(self) -> None: self.features = BASE_GRAPH_NODE_FEATURES self.adjacency_matrix = BASE_GRAPH self.labels = BASE_GRAPH.view(-1) self.dataset = 'training-test-data' self.tests_data_directory = 'tests/test_data/' tests_model_directory = 'tests/model_checkpoints' tests_results_directory = 'tests/grid_search_results' device = "cpu" self.data_path = self.tests_data_directory + self.dataset + "/" self.repository = FileSystemRepository(self.tests_data_directory, self.dataset) self.data_preprocessor = DataPreprocessor() self.data_preprocessor.enable_test_mode() self.model_trainer = Trainer(self.data_preprocessor, device) self.saver = Saver(tests_model_directory, tests_results_directory)
def test_extract_data_dimensions(self): # Given dataset_length = 1 features = BASE_GRAPH_NODE_FEATURES all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1], [0, 1, 3, -1], [2, -1, -1, -1]]) labels = BASE_GRAPH.view(-1) dataset = GraphDataset("") dataset.enable_test_mode() dataset.dataset = [(features, all_neighbors, labels, i) for i in range(dataset_length)] data_dimensions_expected = (features.size(), labels.size()) # When data_dimensions = self.data_preprocessor.extract_data_dimensions( dataset) # Then self.assertEqual(data_dimensions_expected, data_dimensions)
def setUp(self) -> None: self.number_of_nodes = BASE_GRAPH.size()[0] self.number_of_node_features = BASE_GRAPH_NODE_FEATURES.size()[1] self.fully_connected_layer_input_size = self.number_of_nodes * self.number_of_node_features self.fully_connected_layer_output_size = self.number_of_nodes**2 self.device = "cpu" self.time_steps = 2 self.graph_encoder = RNNEncoder( time_steps=self.time_steps, number_of_nodes=self.number_of_nodes, number_of_node_features=self.number_of_node_features, fully_connected_layer_input_size=self. fully_connected_layer_input_size, fully_connected_layer_output_size=self. fully_connected_layer_output_size) self.graph_encoder.w_graph_node_features = nn.Parameter( MULTIPLICATION_FACTOR * (to.ones( (self.number_of_nodes, self.number_of_nodes))), requires_grad=False) self.graph_encoder.w_graph_neighbor_messages = nn.Parameter( MULTIPLICATION_FACTOR * to.ones( (self.number_of_nodes, self.number_of_nodes)), requires_grad=False) self.graph_encoder.u_graph_node_features = nn.Parameter( MULTIPLICATION_FACTOR * to.ones( (self.number_of_nodes, self.number_of_nodes)), requires_grad=False) self.graph_encoder.u_graph_neighbor_messages = nn.Parameter( MULTIPLICATION_FACTOR * to.ones( (self.number_of_node_features, self.number_of_node_features)), requires_grad=False) self.graph_encoder.linear_weight = to.nn.Parameter( MULTIPLICATION_FACTOR * to.ones(self.fully_connected_layer_output_size, self.fully_connected_layer_input_size), requires_grad=False).float() self.graph_encoder.linear_bias = to.nn.Parameter( MULTIPLICATION_FACTOR * to.tensor( [i for i in range(self.fully_connected_layer_output_size)]), requires_grad=False).float()
def test_train_validation_test_split(self): # Given dataset_length = 10 features = BASE_GRAPH_NODE_FEATURES all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1], [0, 1, 3, -1], [2, -1, -1, -1]]) labels = BASE_GRAPH.view(-1) dataset = GraphDataset("") dataset.enable_test_mode() dataset.dataset = [(features, all_neighbors, labels, i) for i in range(dataset_length)] train_validation_test_split_expected = [7, 2, 1] # When train_validation_test_split = self.data_preprocessor.train_validation_test_split( dataset, batch_size=1, validation_split=0.2, test_split=0.1) train_validation_test_split = [ len(dataset) for dataset in train_validation_test_split ] # Then self.assertEqual(train_validation_test_split_expected, train_validation_test_split)