class TestDataSet(TestCase): def setUp(self): # Test batch size. self.batch_size = 64 # Test CSV path. self.path = os.environ["VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy.csv" # Load data set. galaxy_data_set_strategy = GalaxyDataSetImageStrategy() self.context = Context(galaxy_data_set_strategy) self.dataset = self.context.load_dataset(csv_file=self.path, one_hot=True, validation_size=np.float32(0.2)) def test_load_images(self): # Get a batch of data. x_batch, y_true_batch = self.dataset.train.next_image_batch(self.batch_size) # Load images in this batch. self.dataset.train.load_images(x_batch) # Get the first image ID in the training data set. test_dataset_img_name = self.dataset.train._img_names[0][0] # Get the reference image corresponding to the first image of data set. reference_dataset_img_name = self.dataset.train._images[0] # Get the path of the first image ID. path = os.environ["VIRTUAL_ENV"] + "/data/images/" + str(test_dataset_img_name) + ".jpg" # Load image. test_dataset_image = cv2.imread(path) # Transform image as a 32-bit numpy array. test_dataset_image = test_dataset_image.astype(np.float32) # Normalize the image. test_dataset_image = np.multiply(test_dataset_image, 1.0 / 255.0) np.testing.assert_array_equal(reference_dataset_img_name, test_dataset_image) def test_next_batch(self): # First batch x_batch1, y_true_batch1 = self.dataset.train.next_image_batch(self.batch_size) # Second batch x_batch2, y_true_batch2 = self.dataset.train.next_image_batch(self.batch_size) np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, x_batch1, x_batch2) def test_validation_size(self): dataset = self.context.load_dataset(csv_file=self.path, one_hot=True, validation_size=np.float32(0)) self.assertTrue(dataset.train._num_examples == 31364) self.assertRaises(Exception, lambda: self.context.load_dataset(csv_file=self.path, one_hot=True, validation_size=np.float32(-0.1)))
def test_load_dataset_with_oneHot(self): galaxy_data_set_strategy = MusicGenreJMIRMFCCsStrategy() context = Context(galaxy_data_set_strategy) dataset = context.load_dataset(csv_file=self.path, one_hot=True, validation_size=np.float32(0.2)) self.assertTrue(dataset.train._num_examples == 52400)
def test_load_dataset_with_oneHot(self): galaxy_data_set_strategy = GalaxyDataSetLabelStrategy() context = Context(galaxy_data_set_strategy) dataset = context.load_dataset(csv_file=self.path, one_hot=True, validation_size=np.float32(0.2)) self.assertTrue(dataset.train._num_examples == 25091)
def test_load_dataset_with_oneHot(self): galaxy_data_set_strategy = GalaxyDataSetImageStrategy() context = Context(galaxy_data_set_strategy) dataset = context.load_dataset(csv_file=self.path, one_hot=True, validation_size=np.float32(0.2)) self.assertTrue( dataset.train._num_examples == int(np.round(31364 * 0.8))) self.assertTrue( dataset.valid._num_examples == int(np.round(31364 * 0.2)))
def setUp(self): validation_size = 0.2 # Get the ground truth CSV file from script's parameters. self.galaxy_csv_file = os.environ["VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy.csv" self.galaxy_images_path = os.environ["VIRTUAL_ENV"] + "/data/images/" # Create instance of data set loading strategies. galaxy_label_data_set_strategy = GalaxyDataSetLabelStrategy() # Set the context to galaxy label data set loading strategy. context = Context(galaxy_label_data_set_strategy) context.set_strategy(galaxy_label_data_set_strategy) self.label_dataset = context.load_dataset(csv_file=self.galaxy_csv_file, one_hot=False, validation_size=np.float32(validation_size))
def test_load_dataset_no_oneHot(self): galaxy_data_set_strategy = GalaxyDataSetFeatureStrategy() context = Context(galaxy_data_set_strategy) dataset = context.load_dataset(csv_file=self.path, one_hot=False, validation_size=self.validation_size) self.assertTrue(dataset.train._num_examples == 25091)
def main(): """ Program's entry point. """ # The desired validation size. validation_size = 0.2 # Get the ground truth CSV file from script's parameters. galaxy_csv_file = os.environ["VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy.csv" galaxy_feature_csv_file = os.environ[ "VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy_feature_vectors.csv" spam_feature_csv_file = os.environ[ "VIRTUAL_ENV"] + "/data/csv/spam/spam.csv" galaxy_images_path = os.environ["VIRTUAL_ENV"] + "/data/images/" galaxy_feature_vector_export_path = os.environ[ "VIRTUAL_ENV"] + "/data/csv/galaxy/exported_personal_galaxy_feature_vectors.csv" galaxy_mlp_export_path = os.environ[ "VIRTUAL_ENV"] + "/data/models/exports/MLP/my_mlp" # Create instance of data set loading strategies. galaxy_image_data_set_strategy = GalaxyDataSetImageStrategy() galaxy_feature_data_set_strategy = GalaxyDataSetFeatureStrategy() galaxy_label_data_set_strategy = GalaxyDataSetLabelStrategy() spam_feature_dataset_strategy = SpamDataSetFeatureStrategy() # Set the context to galaxy image data set loading strategy. context = Context(galaxy_image_data_set_strategy) img_dataset = context.load_dataset( csv_file=galaxy_csv_file, one_hot=True, validation_size=np.float32(validation_size)) # Set the context to galaxy feature data set loading strategy. context.set_strategy(galaxy_feature_data_set_strategy) feature_oneHot_dataset = context.load_dataset( csv_file=galaxy_feature_csv_file, one_hot=True, validation_size=np.float32(0.2)) feature_dataset = context.load_dataset(csv_file=galaxy_feature_csv_file, one_hot=False, validation_size=np.float32(0.2)) # Set the context to galaxy label data set loading strategy. context.set_strategy(galaxy_label_data_set_strategy) label_dataset = context.load_dataset( csv_file=galaxy_csv_file, one_hot=False, validation_size=np.float32(validation_size)) context.set_strategy(spam_feature_dataset_strategy) spam_feature_dataset = context.load_dataset( csv_file=spam_feature_csv_file, one_hot=False, validation_size=np.float32(validation_size)) # For TP02, set the discretization strategy and discretize data. preprocessor_context = DiscretizerContext( SupervisedDiscretizationStrategy()) supervised_discretised_dataset = preprocessor_context.discretize( data_set=feature_dataset, validation_size=np.float32(validation_size)) preprocessor_context.set_strategy(UnsupervisedDiscretizationStrategy()) unsupervised_discretised_dataset = preprocessor_context.discretize( data_set=feature_dataset, validation_size=np.float32(validation_size)) # Process galaxies. galaxy_processor = GalaxyProcessor(galaxy_images_path) features = galaxy_processor.process_galaxy(label_dataset) # Save extracted features to file. np.savetxt(galaxy_feature_vector_export_path, features, delimiter=",") print("File saved in directory " + galaxy_feature_vector_export_path)