Beispiel #1
0
    def test_download_extract_and_crop_bitmaps(self):
        # Arrange
        dataset_downloader = AudiverisOmrDatasetDownloader(
            "temp/audiveris_omr_raw")

        # Act
        dataset_downloader.download_and_extract_dataset()
        image_generator = AudiverisOmrImageGenerator()
        image_generator.extract_symbols("temp/audiveris_omr_raw",
                                        "temp/audiveris_omr_img")
        all_image_files = [
            y for x in os.walk("temp/audiveris_omr_img")
            for y in glob(os.path.join(x[0], '*.png'))
        ]
        actual_number_of_files = len(all_image_files)

        # Assert
        self.assertEqual(1056, actual_number_of_files)

        # Cleanup
        os.remove("AudiverisOmrDataset.zip")
        shutil.rmtree("temp")
    def __download_and_extract_datasets(self, datasets, width, height, use_fixed_canvas, staff_line_spacing,
                                        staff_line_vertical_offsets, stroke_thicknesses_for_generated_symbols,
                                        random_position_on_canvas: bool):
        if 'homus' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory, "homus_raw")
            dataset_downloader = HomusDatasetDownloader(raw_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
            generated_image_width = width
            generated_image_height = height
            if not use_fixed_canvas:
                # If we are not using a fixed canvas, remove those arguments to
                # allow symbols being drawn at their original shapes
                generated_image_width, generated_image_height = None, None
            bounding_boxes = HomusImageGenerator.create_images(raw_dataset_directory, self.image_dataset_directory,
                                                               stroke_thicknesses_for_generated_symbols,
                                                               generated_image_width,
                                                               generated_image_height, staff_line_spacing,
                                                               staff_line_vertical_offsets,
                                                               random_position_on_canvas)

            bounding_boxes_cache = os.path.join(self.dataset_directory, "bounding_boxes.txt")
            with open(bounding_boxes_cache, "wb") as cache:
                pickle.dump(bounding_boxes, cache)
        if 'rebelo1' in datasets:
            dataset_downloader = RebeloMusicSymbolDataset1Downloader(self.image_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
        if 'rebelo2' in datasets:
            dataset_downloader = RebeloMusicSymbolDataset2Downloader(self.image_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
        if 'printed' in datasets:
            dataset_downloader = PrintedMusicSymbolsDatasetDownloader(self.image_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
        if 'fornes' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory, "fornes_raw")
            dataset_downloader = FornesMusicSymbolsDatasetDownloader(raw_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
            image_preparer = FornesMusicSymbolsImagePreparer()
            image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory)
        if 'audiveris' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory, "audiveris_omr_raw")
            intermediate_image_directory = os.path.join(self.dataset_directory, "audiveris_omr_images")
            dataset_downloader = AudiverisOmrDatasetDownloader(raw_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
            image_generator = AudiverisOmrImageGenerator()
            image_generator.extract_symbols(raw_dataset_directory, intermediate_image_directory)
            image_preparer = AudiverisOmrImageExtractor()
            image_preparer.prepare_dataset(intermediate_image_directory, self.image_dataset_directory)
        if 'muscima_pp' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory, "muscima_pp_raw")
            dataset_downloader = MuscimaPlusPlusDatasetDownloader(raw_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
            image_generator = MuscimaPlusPlusImageGenerator2()
            image_generator.extract_symbols_for_training(raw_dataset_directory, self.image_dataset_directory)
        if 'openomr' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory, "open_omr_raw")
            dataset_downloader = OpenOmrDatasetDownloader(raw_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
            image_preparer = OpenOmrImagePreparer()
            image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory)
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--raw_dataset_directory",
        type=str,
        default="../data/audiveris_omr_raw",
        help="The directory, where the raw Muscima++ dataset can be found")
    parser.add_argument(
        "--intermediate_image_directory",
        type=str,
        default="../data/audiveris_omr",
        help="The directory, where the raw bitmaps will be generated")
    parser.add_argument(
        "--image_dataset_directory",
        type=str,
        default="../data/images",
        help="The directory, where the processed bitmaps will be copied to after filtering and renaming classes")

    flags, unparsed = parser.parse_known_args()

    dataset_downloader = AudiverisOmrDatasetDownloader()
    dataset_downloader.download_and_extract_dataset(flags.raw_dataset_directory)

    # Convert the raw data into images
    image_generator = AudiverisOmrImageGenerator()
    image_generator.extract_symbols(flags.raw_dataset_directory, flags.intermediate_image_directory)

    # Actually prepare our dataset
    dataset_preparer = AudiverisOmrImageExtractor()
    dataset_preparer.prepare_dataset(flags.intermediate_image_directory, flags.image_dataset_directory)
                                  staff_line_spacing=14,
                                  staff_line_vertical_offsets=[24])

#%% Printed MusicSymbols
from omrdatasettools.downloaders.PrintedMusicSymbolsDatasetDownloader import PrintedMusicSymbolsDatasetDownloader
dataset_downloader = PrintedMusicSymbolsDatasetDownloader("./data/printed")
dataset_downloader.download_and_extract_dataset()

#%% Audiveris
from omrdatasettools.downloaders.AudiverisOmrDatasetDownloader import AudiverisOmrDatasetDownloader
dataset_downloader = AudiverisOmrDatasetDownloader("./data/audiveris")
dataset_downloader.download_and_extract_dataset()

from omrdatasettools.image_generators.AudiverisOmrImageGenerator import AudiverisOmrImageGenerator, AudiverisOmrSymbol

imgen = AudiverisOmrImageGenerator()
imgen.extract_symbols(raw_data_directory='./data/audiveris',
                      destination_directory='./data/audiveris/images')

#%% Open OMR
from omrdatasettools.downloaders.OpenOmrDatasetDownloader import OpenOmrDatasetDownloader

dataset_downloader = OpenOmrDatasetDownloader("./data/openomr")
dataset_downloader.download_and_extract_dataset()

#%% Capitan
'''
from omrdatasettools.downloaders.CapitanDatasetDownloader import CapitanDatasetDownloader
dataset_downloader = CapitanDatasetDownloader("./data/capitan")
dataset_downloader.download_and_extract_dataset()
'''