def test_download_and_extract_openomr_dataset_expect_folder_to_be_created( self): destination_directory = "OpenOMR" downloader = OpenOmrDatasetDownloader() zip_file = downloader.get_dataset_filename() number_of_samples_in_the_dataset = 706 target_file_extension = "*.png" self.download_dataset_and_verify_correct_extraction( destination_directory, number_of_samples_in_the_dataset, target_file_extension, zip_file, downloader)
def __download_and_extract_datasets(self, datasets, width, height, use_fixed_canvas, staff_line_spacing, staff_line_vertical_offsets, stroke_thicknesses_for_generated_symbols, random_position_on_canvas: bool): if 'homus' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "homus_raw") dataset_downloader = HomusDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() generated_image_width = width generated_image_height = height if not use_fixed_canvas: # If we are not using a fixed canvas, remove those arguments to # allow symbols being drawn at their original shapes generated_image_width, generated_image_height = None, None bounding_boxes = HomusImageGenerator.create_images(raw_dataset_directory, self.image_dataset_directory, stroke_thicknesses_for_generated_symbols, generated_image_width, generated_image_height, staff_line_spacing, staff_line_vertical_offsets, random_position_on_canvas) bounding_boxes_cache = os.path.join(self.dataset_directory, "bounding_boxes.txt") with open(bounding_boxes_cache, "wb") as cache: pickle.dump(bounding_boxes, cache) if 'rebelo1' in datasets: dataset_downloader = RebeloMusicSymbolDataset1Downloader(self.image_dataset_directory) dataset_downloader.download_and_extract_dataset() if 'rebelo2' in datasets: dataset_downloader = RebeloMusicSymbolDataset2Downloader(self.image_dataset_directory) dataset_downloader.download_and_extract_dataset() if 'printed' in datasets: dataset_downloader = PrintedMusicSymbolsDatasetDownloader(self.image_dataset_directory) dataset_downloader.download_and_extract_dataset() if 'fornes' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "fornes_raw") dataset_downloader = FornesMusicSymbolsDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_preparer = FornesMusicSymbolsImagePreparer() image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory) if 'audiveris' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "audiveris_omr_raw") intermediate_image_directory = os.path.join(self.dataset_directory, "audiveris_omr_images") dataset_downloader = AudiverisOmrDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_generator = AudiverisOmrImageGenerator() image_generator.extract_symbols(raw_dataset_directory, intermediate_image_directory) image_preparer = AudiverisOmrImageExtractor() image_preparer.prepare_dataset(intermediate_image_directory, self.image_dataset_directory) if 'muscima_pp' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "muscima_pp_raw") dataset_downloader = MuscimaPlusPlusDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_generator = MuscimaPlusPlusImageGenerator2() image_generator.extract_symbols_for_training(raw_dataset_directory, self.image_dataset_directory) if 'openomr' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "open_omr_raw") dataset_downloader = OpenOmrDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_preparer = OpenOmrImagePreparer() image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory)
def test_download_and_prepare_dataset(self): # Arrange datasetDownloader = OpenOmrDatasetDownloader() expected_number_of_images = 503 # Act datasetDownloader.download_and_extract_dataset("temp/open_omr_raw2") image_generator = OpenOmrImagePreparer() image_generator.prepare_dataset("temp/open_omr_raw2", "temp/open_omr_image2") all_image_files = [ y for x in os.walk("temp/open_omr_image2") for y in glob(os.path.join(x[0], '*.png')) ] actual_number_of_images = len(all_image_files) # Assert self.assertEqual(expected_number_of_images, actual_number_of_images) # Cleanup os.remove(datasetDownloader.get_dataset_filename()) shutil.rmtree("temp")
source_folder = os.path.join(raw_dataset_directory, symbol_class) destination_folder = os.path.join(image_dataset_directory, destination_class_name) os.makedirs(destination_folder, exist_ok=True) dir_util.copy_tree(source_folder, destination_folder) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--raw_dataset_directory", type=str, default="../data/open_omr_raw", help="The directory, where the raw Muscima++ dataset can be found") parser.add_argument( "--image_dataset_directory", type=str, default="../data/images", help="The directory, where the images will be copied to") flags, unparsed = parser.parse_known_args() # Download the dataset dataset_downloader = OpenOmrDatasetDownloader(flags.raw_dataset_directory) #dataset_downloader.download_and_extract_dataset() # Actually prepare our dataset dataset_preparer = OpenOmrImagePreparer() dataset_preparer.prepare_dataset(flags.raw_dataset_directory, flags.image_dataset_directory)
destination_folder = os.path.join(image_dataset_directory, destination_class_name) os.makedirs(destination_folder, exist_ok=True) dir_util.copy_tree(source_folder, destination_folder) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--raw_dataset_directory", type=str, default="../data/open_omr_raw", help="The directory, where the raw Muscima++ dataset can be found") parser.add_argument( "--image_dataset_directory", type=str, default="../data/images", help="The directory, where the images will be copied to") flags, unparsed = parser.parse_known_args() # Download the dataset dataset_downloader = OpenOmrDatasetDownloader() dataset_downloader.download_and_extract_dataset( flags.raw_dataset_directory) # Actually prepare our dataset dataset_preparer = OpenOmrImagePreparer() dataset_preparer.prepare_dataset(flags.raw_dataset_directory, flags.image_dataset_directory)
#%% Audiveris from omrdatasettools.downloaders.AudiverisOmrDatasetDownloader import AudiverisOmrDatasetDownloader dataset_downloader = AudiverisOmrDatasetDownloader("./data/audiveris") dataset_downloader.download_and_extract_dataset() from omrdatasettools.image_generators.AudiverisOmrImageGenerator import AudiverisOmrImageGenerator, AudiverisOmrSymbol imgen = AudiverisOmrImageGenerator() imgen.extract_symbols(raw_data_directory='./data/audiveris', destination_directory='./data/audiveris/images') #%% Open OMR from omrdatasettools.downloaders.OpenOmrDatasetDownloader import OpenOmrDatasetDownloader dataset_downloader = OpenOmrDatasetDownloader("./data/openomr") dataset_downloader.download_and_extract_dataset() #%% Capitan ''' from omrdatasettools.downloaders.CapitanDatasetDownloader import CapitanDatasetDownloader dataset_downloader = CapitanDatasetDownloader("./data/capitan") dataset_downloader.download_and_extract_dataset() ''' #%% MUSCIMA from omrdatasettools.downloaders.MuscimaPlusPlusDatasetDownloader import MuscimaPlusPlusDatasetDownloader dataset_downloader = MuscimaPlusPlusDatasetDownloader("./data/muscima") dataset_downloader.download_and_extract_dataset()