def test_download_and_extract_fornes_symbols_dataset_expect_folder_to_be_created( self): destination_directory = "FornesMusicSymbols" downloader = FornesMusicSymbolsDatasetDownloader() zip_file = downloader.get_dataset_filename() number_of_samples_in_the_dataset = 4094 target_file_extension = "*.bmp" self.download_dataset_and_verify_correct_extraction( destination_directory, number_of_samples_in_the_dataset, target_file_extension, zip_file, downloader)
def __download_and_extract_datasets(self, datasets, width, height, use_fixed_canvas, staff_line_spacing, staff_line_vertical_offsets, stroke_thicknesses_for_generated_symbols, random_position_on_canvas: bool): if 'homus' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "homus_raw") dataset_downloader = HomusDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() generated_image_width = width generated_image_height = height if not use_fixed_canvas: # If we are not using a fixed canvas, remove those arguments to # allow symbols being drawn at their original shapes generated_image_width, generated_image_height = None, None bounding_boxes = HomusImageGenerator.create_images(raw_dataset_directory, self.image_dataset_directory, stroke_thicknesses_for_generated_symbols, generated_image_width, generated_image_height, staff_line_spacing, staff_line_vertical_offsets, random_position_on_canvas) bounding_boxes_cache = os.path.join(self.dataset_directory, "bounding_boxes.txt") with open(bounding_boxes_cache, "wb") as cache: pickle.dump(bounding_boxes, cache) if 'rebelo1' in datasets: dataset_downloader = RebeloMusicSymbolDataset1Downloader(self.image_dataset_directory) dataset_downloader.download_and_extract_dataset() if 'rebelo2' in datasets: dataset_downloader = RebeloMusicSymbolDataset2Downloader(self.image_dataset_directory) dataset_downloader.download_and_extract_dataset() if 'printed' in datasets: dataset_downloader = PrintedMusicSymbolsDatasetDownloader(self.image_dataset_directory) dataset_downloader.download_and_extract_dataset() if 'fornes' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "fornes_raw") dataset_downloader = FornesMusicSymbolsDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_preparer = FornesMusicSymbolsImagePreparer() image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory) if 'audiveris' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "audiveris_omr_raw") intermediate_image_directory = os.path.join(self.dataset_directory, "audiveris_omr_images") dataset_downloader = AudiverisOmrDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_generator = AudiverisOmrImageGenerator() image_generator.extract_symbols(raw_dataset_directory, intermediate_image_directory) image_preparer = AudiverisOmrImageExtractor() image_preparer.prepare_dataset(intermediate_image_directory, self.image_dataset_directory) if 'muscima_pp' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "muscima_pp_raw") dataset_downloader = MuscimaPlusPlusDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_generator = MuscimaPlusPlusImageGenerator2() image_generator.extract_symbols_for_training(raw_dataset_directory, self.image_dataset_directory) if 'openomr' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "open_omr_raw") dataset_downloader = OpenOmrDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_preparer = OpenOmrImagePreparer() image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory)
def test_download_extract_and_invert_bitmaps(self): # Arrange temp_path = "temp/fornes_raw" datasetDownloader = FornesMusicSymbolsDatasetDownloader(temp_path) datasetDownloader.download_and_extract_dataset() # Act imageInverter = ImageInverter() imageInverter.invert_images(temp_path) all_image_files = [y for x in os.walk(temp_path) for y in glob(os.path.join(x[0], '*.png'))] actual_number_of_files = len(all_image_files) # Assert self.assertEqual(4094, actual_number_of_files) # Cleanup os.remove("Music_Symbols.zip") shutil.rmtree("temp")
i for i in os.listdir(source_folder) if i.endswith(".png") and i not in broken_symbols ] for image in all_png_images: shutil.copy(os.path.join(source_folder, image), destination_folder) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--raw_dataset_directory", type=str, default="../data/fornes_raw", help="The directory, where the raw Muscima++ dataset can be found") parser.add_argument( "--image_dataset_directory", type=str, default="../data/images", help="The directory, where the images will be copied to") flags, unparsed = parser.parse_known_args() dataset_downloader = FornesMusicSymbolsDatasetDownloader() dataset_downloader.download_and_extract_dataset( flags.raw_dataset_directory) dataset_preparer = FornesMusicSymbolsImagePreparer() dataset_preparer.prepare_dataset(flags.raw_dataset_directory, flags.image_dataset_directory)