def test_download_extract_and_crop_bitmaps(self): # Arrange dataset_downloader = AudiverisOmrDatasetDownloader( "temp/audiveris_omr_raw") # Act dataset_downloader.download_and_extract_dataset() image_generator = AudiverisOmrImageGenerator() image_generator.extract_symbols("temp/audiveris_omr_raw", "temp/audiveris_omr_img") all_image_files = [ y for x in os.walk("temp/audiveris_omr_img") for y in glob(os.path.join(x[0], '*.png')) ] actual_number_of_files = len(all_image_files) # Assert self.assertEqual(1056, actual_number_of_files) # Cleanup os.remove("AudiverisOmrDataset.zip") shutil.rmtree("temp")
def __download_and_extract_datasets(self, datasets, width, height, use_fixed_canvas, staff_line_spacing, staff_line_vertical_offsets, stroke_thicknesses_for_generated_symbols, random_position_on_canvas: bool): if 'homus' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "homus_raw") dataset_downloader = HomusDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() generated_image_width = width generated_image_height = height if not use_fixed_canvas: # If we are not using a fixed canvas, remove those arguments to # allow symbols being drawn at their original shapes generated_image_width, generated_image_height = None, None bounding_boxes = HomusImageGenerator.create_images(raw_dataset_directory, self.image_dataset_directory, stroke_thicknesses_for_generated_symbols, generated_image_width, generated_image_height, staff_line_spacing, staff_line_vertical_offsets, random_position_on_canvas) bounding_boxes_cache = os.path.join(self.dataset_directory, "bounding_boxes.txt") with open(bounding_boxes_cache, "wb") as cache: pickle.dump(bounding_boxes, cache) if 'rebelo1' in datasets: dataset_downloader = RebeloMusicSymbolDataset1Downloader(self.image_dataset_directory) dataset_downloader.download_and_extract_dataset() if 'rebelo2' in datasets: dataset_downloader = RebeloMusicSymbolDataset2Downloader(self.image_dataset_directory) dataset_downloader.download_and_extract_dataset() if 'printed' in datasets: dataset_downloader = PrintedMusicSymbolsDatasetDownloader(self.image_dataset_directory) dataset_downloader.download_and_extract_dataset() if 'fornes' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "fornes_raw") dataset_downloader = FornesMusicSymbolsDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_preparer = FornesMusicSymbolsImagePreparer() image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory) if 'audiveris' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "audiveris_omr_raw") intermediate_image_directory = os.path.join(self.dataset_directory, "audiveris_omr_images") dataset_downloader = AudiverisOmrDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_generator = AudiverisOmrImageGenerator() image_generator.extract_symbols(raw_dataset_directory, intermediate_image_directory) image_preparer = AudiverisOmrImageExtractor() image_preparer.prepare_dataset(intermediate_image_directory, self.image_dataset_directory) if 'muscima_pp' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "muscima_pp_raw") dataset_downloader = MuscimaPlusPlusDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_generator = MuscimaPlusPlusImageGenerator2() image_generator.extract_symbols_for_training(raw_dataset_directory, self.image_dataset_directory) if 'openomr' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "open_omr_raw") dataset_downloader = OpenOmrDatasetDownloader(raw_dataset_directory) dataset_downloader.download_and_extract_dataset() image_preparer = OpenOmrImagePreparer() image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory)
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--raw_dataset_directory", type=str, default="../data/audiveris_omr_raw", help="The directory, where the raw Muscima++ dataset can be found") parser.add_argument( "--intermediate_image_directory", type=str, default="../data/audiveris_omr", help="The directory, where the raw bitmaps will be generated") parser.add_argument( "--image_dataset_directory", type=str, default="../data/images", help="The directory, where the processed bitmaps will be copied to after filtering and renaming classes") flags, unparsed = parser.parse_known_args() dataset_downloader = AudiverisOmrDatasetDownloader() dataset_downloader.download_and_extract_dataset(flags.raw_dataset_directory) # Convert the raw data into images image_generator = AudiverisOmrImageGenerator() image_generator.extract_symbols(flags.raw_dataset_directory, flags.intermediate_image_directory) # Actually prepare our dataset dataset_preparer = AudiverisOmrImageExtractor() dataset_preparer.prepare_dataset(flags.intermediate_image_directory, flags.image_dataset_directory)
staff_line_spacing=14, staff_line_vertical_offsets=[24]) #%% Printed MusicSymbols from omrdatasettools.downloaders.PrintedMusicSymbolsDatasetDownloader import PrintedMusicSymbolsDatasetDownloader dataset_downloader = PrintedMusicSymbolsDatasetDownloader("./data/printed") dataset_downloader.download_and_extract_dataset() #%% Audiveris from omrdatasettools.downloaders.AudiverisOmrDatasetDownloader import AudiverisOmrDatasetDownloader dataset_downloader = AudiverisOmrDatasetDownloader("./data/audiveris") dataset_downloader.download_and_extract_dataset() from omrdatasettools.image_generators.AudiverisOmrImageGenerator import AudiverisOmrImageGenerator, AudiverisOmrSymbol imgen = AudiverisOmrImageGenerator() imgen.extract_symbols(raw_data_directory='./data/audiveris', destination_directory='./data/audiveris/images') #%% Open OMR from omrdatasettools.downloaders.OpenOmrDatasetDownloader import OpenOmrDatasetDownloader dataset_downloader = OpenOmrDatasetDownloader("./data/openomr") dataset_downloader.download_and_extract_dataset() #%% Capitan ''' from omrdatasettools.downloaders.CapitanDatasetDownloader import CapitanDatasetDownloader dataset_downloader = CapitanDatasetDownloader("./data/capitan") dataset_downloader.download_and_extract_dataset() '''