def test_download_and_prepare_dataset(self): # Arrange datasetDownloader = OpenOmrDatasetDownloader() expected_number_of_images = 503 # Act datasetDownloader.download_and_extract_dataset("temp/open_omr_raw2") image_generator = OpenOmrImagePreparer() image_generator.prepare_dataset("temp/open_omr_raw2", "temp/open_omr_image2") all_image_files = [ y for x in os.walk("temp/open_omr_image2") for y in glob(os.path.join(x[0], '*.png')) ] actual_number_of_images = len(all_image_files) # Assert self.assertEqual(expected_number_of_images, actual_number_of_images) # Cleanup os.remove(datasetDownloader.get_dataset_filename()) shutil.rmtree("temp")
destination_folder = os.path.join(image_dataset_directory, destination_class_name) os.makedirs(destination_folder, exist_ok=True) dir_util.copy_tree(source_folder, destination_folder) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--raw_dataset_directory", type=str, default="../data/open_omr_raw", help="The directory, where the raw Muscima++ dataset can be found") parser.add_argument( "--image_dataset_directory", type=str, default="../data/images", help="The directory, where the images will be copied to") flags, unparsed = parser.parse_known_args() # Download the dataset dataset_downloader = OpenOmrDatasetDownloader() dataset_downloader.download_and_extract_dataset( flags.raw_dataset_directory) # Actually prepare our dataset dataset_preparer = OpenOmrImagePreparer() dataset_preparer.prepare_dataset(flags.raw_dataset_directory, flags.image_dataset_directory)