def evaluate_result(mung_reference_file, predicted_mung_file):
    print("Computing statistics for {0}".format(predicted_mung_file))
    # Read crop objects list
    reference_objects = parse_cropobject_list(mung_reference_file)
    predicted_objects = parse_cropobject_list(predicted_mung_file)

    precision, recall, f1_score, true_positives, false_positives, false_negatives = \
        compute_statistics_on_crop_objects(reference_objects, predicted_objects)
    print(
        'Precision: {0:.3f}, Recall: {1:.3f}, F1-Score: {2:.3f}, True positives: {3}, False positives: {4}, '
        'False Negatives: {5}'.format(precision, recall, f1_score,
                                      true_positives, false_positives,
                                      false_negatives))
    return precision, recall, f1_score, true_positives, false_positives, false_negatives
예제 #2
0
def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    mungs = parse_cropobject_list(args.mung)

    img = imread(args.image, mode='L')

    output_mungs = []
    for m in mungs:
        if (args.classes is not None) and (m.clsname not in args.classes):
            output_mungs.append(m)
            continue
        if (not args.force_binarize) and (m.mask.nonzero()[0].shape[0] == 0):
            output_mungs.append(m)
            continue
        output_mungs.append(binarize_mask(m, img, inplace=True))

    xml = export_cropobject_list(output_mungs)
    with open(args.output_mung, 'w') as hdl:
        hdl.write(xml)
        hdl.write('\n')

    _end_time = time.clock()
    logging.info('binarize_masks.py done in {0:.3f} s'.format(_end_time - _start_time))
예제 #3
0
def __load_mung(filename: str, exclude_classes: List[str]):
    mungos = parse_cropobject_list(filename)
    mung = NotationGraph(mungos)
    objects_to_exclude = [m for m in mungos if m.clsname in exclude_classes]
    for m in objects_to_exclude:
        mung.remove_vertex(m.objid)
    return mung
예제 #4
0
def count_cropobjects_and_relationships(annot_file):
    cropobjects = parse_cropobject_list(annot_file)
    n_inlinks = 0
    for c in cropobjects:
        if c.inlinks is not None:
            n_inlinks += len(c.inlinks)
    return len(cropobjects), n_inlinks
예제 #5
0
def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    ##########################################################################
    logging.info('Import the CropObject list')
    if not os.path.isfile(args.annot):
        raise ValueError('Annotation file {0} not found!'
                         ''.format(args.annot))
    cropobjects = parse_cropobject_list(args.annot)

    output_cropobjects = add_staff_relationships(
        cropobjects,
        notehead_staffspace_threshold=args.notehead_staffspace_threshold)

    ##########################################################################
    logging.info('Export the combined list.')
    cropobject_string = export_cropobject_list(output_cropobjects)

    if args.export is not None:
        with open(args.export, 'w') as hdl:
            hdl.write(cropobject_string)
    else:
        print(cropobject_string)

    _end_time = time.clock()
    logging.info('add_staff_reationships.py done in {0:.3f} s'
                 ''.format(_end_time - _start_time))
예제 #6
0
    def treinar_knn(self, tipo, nome, caminho, data_source, parametros,
                    ie_dump):
        #try:

        self.NOME_MODELO = nome
        self.TIPO_MODELO = tipo
        self.caminho = caminho

        if tipo == 'MANUSCRITO':
            if ie_dump == "S":
                print('Carregando Modelo MUSCIMA++')
                self.data_source = self.carregar_objeto()
                print('Modelo MUSCIMA++ carregado')
            else:
                print('Criando Modelo MUSCIMA++')
                self.CROPOBJECT_DIR = os.path.join(self.caminho, data_source)
                self.cropobject_fnames = [
                    os.path.join(self.CROPOBJECT_DIR, f)
                    for f in os.listdir(self.CROPOBJECT_DIR)
                ]
                self.data_source = [
                    parse_cropobject_list(f) for f in self.cropobject_fnames
                ]
                self.salvar_objeto(self.data_source)
                print('Modelo MUSCIMA++ carregado e salvo')
        elif tipo == 'DIGITAL':
            print('Carregando Modelo ScoreReader')
            self.dataSourceScoreReader = self.loadDataSourceScoreReader()
            print('Modelo carregado')
        elif tipo == 'MANUSCRITO':
            print('Nao implementado ainda.....')

        if tipo == 'MANUSCRITO':
            print('Extraindo figuras modelo MUSCIMA++')
            self.figuras = [
                self.extrair_elementos_muscima(self.modelo, parametros)
                for self.modelo in self.data_source
            ]
            print('Figuras modelo MUSCIMA++ extraidas com sucesso...')
        elif tipo == 'DIGITAL':
            print('Extraindo figuras modelo ScoreReader')
            self.figuras = [
                self.extrair_elementos_score_reader(self.dataSourceScoreReader)
            ]
            print('Figuras modelo ScoreReader extraidas com sucesso...')

        self.treinar(tipo, self.figuras)

        print("Salvando Modelo...")
        nome = self.salvar_modelo(nome, tipo)
        print("Modelo salvo...")
        print("sucesso...")
        return nome
예제 #7
0
def main(debug=False):
    # TODO capire differenza tra le due cartelle contenenti il groundtruth in formato xml
    CROPOBJECT_DIR = 'data/CVCMUSCIMA/MUSCIMA++/v1.0/data/cropobjects_manual'
    # CROPOBJECT_DIR = 'data/CVCMUSCIMA/MUSCIMA++/v1.0/data/cropobjects_withstaff'

    print()
    print("Reading list of xml annotations files...")
    cropobject_fnames = [
        os.path.join(CROPOBJECT_DIR, f)
        for f in tqdm(os.listdir(CROPOBJECT_DIR))
    ]
    # per debuggare
    if debug:
        cropobject_fnames = cropobject_fnames[70:71]
    cropobject_fnames = sorted(
        cropobject_fnames)  # ordino in modo da avere riproducibilità

    print()
    print("Reading xml annotations...")
    docs = [parse_cropobject_list(f) for f in tqdm(cropobject_fnames)]

    # mischio casualmente documenti (uso seed per riproducibilità)
    np.random.seed(seed)
    np.random.shuffle(docs)
    random.seed(seed)  # questo seed serve per le patch casuali

    # train/validation/test split
    beginValidationIndex = int(len(docs) * splitPoints[0] /
                               100)  # splitPoints[0] = % elementi nel train
    beginTestIndex = int(
        len(docs) * splitPoints[1] /
        100) + beginValidationIndex  # splitPoints[1] = % elementi nel val

    trainDocs = docs[:beginValidationIndex]
    valDocs = docs[beginValidationIndex:beginTestIndex]
    testDocs = docs[beginTestIndex:]

    if not checkAndClearDirectories(directories):
        return

    # per ogni set, genero il corrispondente file json contenente il groundtruth in formato COCO

    with open(cAnnotationsDir + "/instances_train2019.json", "w") as f:
        f.write(getJSONfromDocs(trainDocs, cTrainImagesDir))

    with open(cAnnotationsDir + "/instances_val2019.json", "w") as f:
        f.write(getJSONfromDocs(valDocs, cValidationImagesDir))

    with open(cAnnotationsDir + "/instances_test2019.json", "w") as f:
        f.write(getJSONfromDocs(testDocs, cTestImagesDir))
예제 #8
0
def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    # Parse individual CropObject lists.
    cropobject_lists = []
    _n_parsed_cropobjects = 0
    for i, f in enumerate(args.input):
        cs = parse_cropobject_list(f)
        cropobject_lists.append(cs)

        # Logging progress
        _n_parsed_cropobjects += len(cs)
        if i % 10 == 0 and i > 0:
            _time_parsing = time.clock() - _start_time
            _cropobjects_per_second = old_div(_n_parsed_cropobjects,
                                              _time_parsing)
            logging.info('Parsed {0} cropobjects in {1:.2f} s ({2:.2f} objs/s)'
                         ''.format(_n_parsed_cropobjects, _time_parsing,
                                   _cropobjects_per_second))

    # Merge the CropObject lists into one.
    # This is done so that the resulting object graph can be manipulated
    # at once, without objid clashes.
    cropobjects = merge_cropobject_lists(*cropobject_lists)

    edges = export_cropobject_graph(cropobjects)

    _parse_end_time = time.clock()
    logging.info('Parsing took {0:.2f} s'.format(_parse_end_time -
                                                 _start_time))

    ##########################################################################
    # Analysis

    # Here's where the results are stored, for export into various
    # formats. (Currently, we only print them.)
    stats = compute_cropobject_stats(cropobjects, edges=edges)

    ##########################################################################
    # Export
    if args.emit == 'print':
        emit_stats_pprint(stats)
    # More export options:
    #  - json
    #  - latex table

    _end_time = time.clock()
    logging.info('analyze_annotations.py done in {0:.3f} s'
                 ''.format(_end_time - _start_time))
예제 #9
0
def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    cropobjects = parse_cropobject_list(args.input_mung)

    output_cropobjects = build_SEILS_composites(cropobjects)

    xml = export_cropobject_list(output_cropobjects)
    with open(args.output_mung, 'w') as hdl:
        hdl.write(xml)
        hdl.write('\n')

    _end_time = time.clock()
    logging.info('build_composites.py done in {0:.3f} s'.format(_end_time - _start_time))
def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    logging.warning('Merging CropObject lists is now very dangerous,'
                    ' becaues of the uid situation.')

    inputs = [parse_cropobject_list(f) for f in args.inputs]
    merged = merge_cropobject_lists(*inputs)
    with codecs.open(args.output, 'w', 'utf-8') as hdl:
        hdl.write(export_cropobject_list(merged))
        hdl.write('\n')

    _end_time = time.clock()
    logging.info('merge_cropobject_lists.py done in {0:.3f} s'.format(_end_time - _start_time))
def main():
    # TODO capire differenza tra i due XML
    CROPOBJECT_DIR = 'data/CVCMUSCIMA/MUSCIMA++/v1.0/data/cropobjects_manual'
    # CROPOBJECT_DIR = 'data/CVCMUSCIMA/MUSCIMA++/v1.0/data/cropobjects_withstaff'

    cropobject_fnames = [
        os.path.join(CROPOBJECT_DIR, f) for f in os.listdir(CROPOBJECT_DIR)
    ]
    # per debuggare
    cropobject_fnames = cropobject_fnames[:5]

    docs = [parse_cropobject_list(f) for f in cropobject_fnames]

    for docID in range(len(docs)):
        doc = docs[docID]

        # prendo dall'xml l'id del writer (w) e dello spartito (p)
        w = doc[0].uid[31:33]
        p = doc[0].uid[36:38]

        imgPath = "data/CVCMUSCIMA/CvcMuscima-Distortions/ideal/w-" + w + "/image/p0" + p + ".png"
        imgStaffPath = "data/CVCMUSCIMA/CvcMuscima-Distortions/ideal/w-" + w + "/gt/p0" + p + ".png"

        imgStaff = mpimg.imread(imgStaffPath)
        img = mpimg.imread(imgPath)

        horizontalProjection = np.sum(imgStaff, axis=1)

        pentasSeparators = getPentasSeparators(horizontalProjection)

        notesAnnotations = getOrderedNotesAnnotations(doc, imgStaff)

        # per plottare la proiezione orizzontale si deve prima trasformare il vettore in una lista
        plt.plot(horizontalProjection.tolist())
        plt.show()

        imgStaff = getPreprocessedStaffImage(imgStaff)
        imgStaffLedgers = getStaffImageWithLedgers(imgStaff, doc)

        notesPositions = getNotesPentasPositions(imgStaff, imgStaffLedgers,
                                                 notesAnnotations)
        print(notesPositions)
        if isGuiAvailable:
            plt.imshow(img, cmap="gray")
            plt.show()
예제 #12
0
def draw_bounding_boxes_into_image(image_path: str, ground_truth_annotations_path: str, destination_path: str,
                                   classes_mapping):
    crop_objects = parse_cropobject_list(ground_truth_annotations_path)
    img = cv2.imread(image_path, True)

    for index, crop_object in enumerate(crop_objects):
        # String to float, float to int
        x1 = crop_object.left
        y1 = crop_object.top
        x2 = crop_object.right
        y2 = crop_object.bottom

        color_name = STANDARD_COLORS[classes_mapping[crop_object.clsname] % len(STANDARD_COLORS)]
        color = ImageColor.getrgb(color_name)
        cv2.rectangle(img, (x1, y1), (x2, y2), color, 3)
        # cv2.putText(img, crop_object.clsname + '/' + str(index + 1), (x1, y1), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1)

    cv2.imwrite(destination_path, img)
예제 #13
0
def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    # Your code goes here
    ##########################################################################
    logging.info('Import the CropObject list')
    if not os.path.isfile(args.annot):
        raise ValueError('Annotation file {0} not found!'
                         ''.format(args.annot))
    cropobjects = parse_cropobject_list(args.annot)

    _cropobjects_dict = {c.objid: c for c in cropobjects}

    ##########################################################################
    staff_cropobjects_dict = {c.objid: c for c in cropobjects
                              if c.clsname in STAFF_CLSNAMES}

    output_cropobjects = []
    for c in cropobjects:
        if c.objid in staff_cropobjects_dict:
            continue
        new_c = copy.deepcopy(c)
        new_c.inlinks = [i for i in c.inlinks
                         if i not in staff_cropobjects_dict]
        new_c.outlinks = [o for o in c.outlinks
                          if o not in staff_cropobjects_dict]
        output_cropobjects.append(new_c)

    ##########################################################################
    logging.info('Export the stripped list.')
    cropobject_string = export_cropobject_list(output_cropobjects)

    if args.export is not None:
        with open(args.export, 'w') as hdl:
            hdl.write(cropobject_string)
    else:
        print(cropobject_string)



    _end_time = time.clock()
    logging.info('[XXXX] done in {0:.3f} s'.format(_end_time - _start_time))
예제 #14
0
    def load_mungos(self, classes=None, by_page=False):
        """Loads all the available MuNG objects as a list. You need to make
        sure the objids don't clash across pages!"""
        self.update()
        if 'mung' not in self.views:
            raise MSMDDBError('Score {0}: mung view not available!'
                              ''.format(self.name))
        mung_files = self.view_files('mung')

        mungos = []
        for f in mung_files:
            ms = parse_cropobject_list(f)
            if by_page:
                mungos.append(ms)
            else:
                mungos.extend(ms)

        if classes is not None:
            mungos = [m for m in mungos if m.clsname in classes]

        return mungos
예제 #15
0
def create_parsing_model(mung_dir,
                         output_dir,
                         output_name,
                         do_eval=False,
                         strategy=PairwiseParsingStrategy()):
    """Creates the vectorizer and parsing classifier and pickles
    them into the given directory.

    :param mung_dir: Input MuNG direcotory, from which the parser/vectorizer
        will be trained.

    :param output_dir: Output directory, into which the vectorizer
        and classifier will be pickled.

    :param output_tag: The root name of the vectorizer and classifier.
        The final names will be created by adding ``.vectorizer.pkl``
        and ``classifier.pkl``.

    :param strategy: Specify the (hyper)parameters of the parser through
        a ``PairwiseParsingStrategy`` object.
    """
    mungs = []
    for f in os.listdir(mung_dir):
        if not f.endswith('.xml'):
            continue
        mung = parse_cropobject_list(os.path.join(mung_dir, f))
        mungs.append(mung)

    vectorizer, clf = train_clf(mungs, strategy=strategy, do_eval=do_eval)

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)

    vec_name = os.path.join(output_dir, output_name + '.vectorizer.pkl')
    with open(vec_name, 'wb') as hdl:
        pickle.dump(vectorizer, hdl, protocol=pickle.HIGHEST_PROTOCOL)

    clf_name = os.path.join(output_dir, output_name + '.classifier.pkl')
    with open(clf_name, 'wb') as hdl:
        pickle.dump(clf, hdl, protocol=pickle.HIGHEST_PROTOCOL)
예제 #16
0
    def normalize_annotations(self, muscima_pp_directory: str,
                              output_directory: str) -> None:

        destination_annotation_file = os.path.join(output_directory,
                                                   "annotations.csv")

        raw_data_directory = os.path.join(muscima_pp_directory, "v1.0", "data",
                                          "cropobjects_withstaff")
        xml_file_paths = [
            y for x in os.walk(raw_data_directory)
            for y in glob(os.path.join(x[0], '*.xml'))
        ]
        all_crop_objects = []  # type: List[CropObject]
        for xml_file_path in tqdm(
                xml_file_paths,
                desc="Loading annotations from MUSCIMA++ dataset"):
            all_crop_objects.extend(parse_cropobject_list(xml_file_path))
            break

        data = []
        for crop_object in tqdm(all_crop_objects, "Converting annotations"):
            writer = re.search("W-..", crop_object.doc).group()
            page = int(re.search("N-..", crop_object.doc).group()[2:])
            filename = "images/CVC-MUSCIMA_{0}_{1}_D-ideal.png".format(
                writer, page)
            class_name = crop_object.clsname
            top = crop_object.top
            left = crop_object.left
            bottom = crop_object.bottom
            right = crop_object.right
            data.append((filename, top, left, bottom, right, class_name))

        all_annotations = pd.DataFrame(data=data,
                                       columns=[
                                           "path_to_image", "top", "left",
                                           "bottom", "right", "class_name"
                                       ])
        all_annotations.to_csv(destination_annotation_file,
                               index=False,
                               float_format="%.0f")
def main(debug=False):
    CROPOBJECT_DIR = 'data/CVCMUSCIMA/MUSCIMA++/v1.0/data/cropobjects_manual'
    # CROPOBJECT_DIR = 'data/CVCMUSCIMA/MUSCIMA++/v1.0/data/cropobjects_withstaff'

    print("Reading files from MUSCIMA...")
    print()
    cropobject_fnames = [
        os.path.join(CROPOBJECT_DIR, f)
        for f in tqdm(os.listdir(CROPOBJECT_DIR))
    ]
    # per debuggare
    if debug:
        cropobject_fnames = cropobject_fnames[70:71]

    print("Reading documents from files...")
    print()
    docs = [parse_cropobject_list(f) for f in tqdm(cropobject_fnames)]

    if not checkAndClearDirectories():
        return

    for docID in tqdm(range(len(docs))):
        doc = docs[docID]
        convert(128, 128, doc)
예제 #18
0
        checkpoint = torch.load(args.params)
        mung_linker_network.load_state_dict(checkpoint['model_state_dict'])
        model = PyTorchNetwork(net=mung_linker_network)
        print(
            "Loaded model which has trained {0} epochs and achieved validation loss of {1:.3f}"
            "".format(checkpoint["epoch"], checkpoint["best_validation_loss"]))

    runner = MunglinkerRunner(model=model,
                              config=config,
                              runtime_batch_iterator=runtime_batch_iterator)

    for i, (image_file, input_mung_file, output_mung_file,
            ground_truth_mung_file) in enumerate(
                zip(image_files, input_mung_files, output_mung_files,
                    ground_truth_mung_files)):
        input_mungos = parse_cropobject_list(input_mung_file)
        input_mung = NotationGraph(input_mungos)

        print('Running Munglinker: {} / {}'.format(i, len(image_files)))
        output_mung = runner.run(image_file, input_mung)
        with open(output_mung_file, 'w') as file:
            file.write(export_cropobject_list(output_mung.cropobjects))

        precision, recall, f1_score, true_positives, false_positives, false_negatives = \
            evaluate_result(ground_truth_mung_file, output_mung_file)

        results.append((input_mung_file, precision, recall, f1_score,
                        true_positives, false_positives, false_negatives))

        if args.play:
            mf = build_midi(nodes=output_mung.cropobjects)
예제 #19
0
def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    ##########################################################################
    logging.info('Converting to absolute paths...')
    root = None
    if args.root is not None:
        root = os.path.abspath(args.root)
    output_dir = os.path.abspath(args.output_dir)
    input_files = [os.path.abspath(f) for f in args.input_files]
    logging.info('Root: {0}'.format(root))
    logging.info('Output: {0}'.format(output_dir))
    logging.info('Example input: {0}'.format(input_files[0]))

    ##########################################################################
    # Get output filenames,
    # fail on non-corresponding input file and root.
    logging.info('Inferring output pathnames...')
    if args.inplace:
        output_files = input_files
    else:
        if args.root is None:
            relative_files = input_files
        else:
            len_root = len(root)
            relative_files = []
            for f in input_files:
                if not os.path.samefile(os.path.commonpath([f, root]), root):
                    raise ValueError('Input file {0} does not seem to'
                                     ' come from the root directory {1}.'
                                     ''.format(f, root))
                relative_files.append(f[len_root + 1:])

        # Ensure output dir exists
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

        logging.debug(
            'Making output file names. Output dir: {0}'.format(output_dir))
        logging.debug('Example rel file: {0}'.format(relative_files[0]))
        logging.debug('Ex. output: {0}'.format(
            os.path.join(output_dir, relative_files[0])))
        output_files = [os.path.join(output_dir, f) for f in relative_files]
        logging.debug('Local Example output file: {0}'.format(output_files[0]))

    logging.info('Example output file: {0}'.format(output_files[0]))

    ##########################################################################
    # Parse cropobjects
    logging.info('Parsing cropobject files ({0} total)...'.format(
        len(input_files)))
    cropobjects_for_files = []
    for i, f in enumerate(input_files):
        cropobjects_for_files.append(parse_cropobject_list(f))
        if (i > 0) and (i % 10 == 0):
            logging.info('Parsed {0} files.'.format(i))

        if args.recode_uids:
            dataset_namespace = args.uid_dataset_namespace
            document_namespace = get_document_namespace(filename=f,
                                                        root=root,
                                                        output_dir=output_dir)
            recoded_cropobjects = recode_ids(
                cropobjects_for_files[-1],
                document_namespace=document_namespace,
                dataset_namespace=dataset_namespace)
            cropobjects_for_files[-1] = recoded_cropobjects

    ##########################################################################
    logging.info('Exporting cropobjects...')
    _i = 0
    for output_file, c in zip(output_files, cropobjects_for_files):
        s = export_cropobject_list(c)
        with open(output_file, 'w') as hdl:
            hdl.write(s)
            hdl.write('\n')

        _i += 1
        if (_i % 10) == 0:
            logging.info('Done: {0} files'.format(_i))

    _end_time = time.clock()
    logging.info(
        'recode_xy_to_topleft.py done in {0:.3f} s'.format(_end_time -
                                                           _start_time))
예제 #20
0
def count_cropobjects(annot_file):
    return len(parse_cropobject_list(annot_file))
import matplotlib.image as mpimg
# import matplotlib.pyplot as plt
from muscima.io import parse_cropobject_list
import os
import xml.etree.cElementTree as ET
import xml.dom.minidom as minidom
from tqdm import tqdm

import random
import cv2

from conversion.utils import *
''' meglio usare convert_to_coco '''

docDiProva = parse_cropobject_list(
    'data/CVCMUSCIMA/MUSCIMA++/v1.0/data/cropobjects_manual/'
    'CVC-MUSCIMA_W-01_N-10_D-ideal.xml')
dataDir = 'data/MNR2019'
imagesDir = dataDir + '/JPEGImages'
annotationsDir = dataDir + '/Annotations'
globalPatchesCounter = 1


# Ritorna una lista di immagini contenenti solo le staffs, con i rispettivi OFFSET sulle y
# In input si ha l'immagine di partenza
def getStaffsFromImage(img, imgStaff):
    horizontalProjection = np.sum(imgStaff, axis=1)
    pentasSeparators = getPentasSeparators(horizontalProjection)
    staffsAndOffsetsY = []

    # per separatori - 1 volte taglio l'immagine di partenza e la salvo in staff
예제 #22
0
def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    ########################################################
    # Load gt image.
    logging.info('Loading staffline image.')
    #  - Initialize Dataset. This checks for the root.

    if args.staff_imfile is None:
        cvc_dataset = CVC_MUSCIMA(root=args.root)
        args.staff_imfile = cvc_dataset.imfile(page=args.number,
                                               writer=args.writer,
                                               distortion='ideal',
                                               mode='staff_only')

    # - Load the image.
    gt = (imread(args.staff_imfile, as_grey=True) * 255).astype('uint8')

    # - Cast as binary mask.
    gt[gt > 0] = 1

    ########################################################
    # Locate stafflines in gt image.
    logging.info('Getting staffline connected components.')

    #  - Get connected components in gt image.
    cc, labels, bboxes = compute_connected_components(gt)

    #  - Use vertical dimension of CCs to determine which ones belong together
    #    to form stafflines. (Criterion: row overlap.)
    n_rows, n_cols = gt.shape
    intervals = [[] for _ in range(n_rows)] # For each row: which CCs have pxs on that row?
    for label, (t, l, b, r) in list(bboxes.items()):
        if label == 0:
            continue
        # Ignore very short staffline segments that can easily be artifacts
        # and should not affect the vertical range of the staffline anyway.
        if (r - l) < 8:
            continue
        for row in range(t, b):
            intervals[row].append(label)

    logging.info('Grouping staffline connected components into stafflines.')
    staffline_components = []   # For each staffline, we collect the CCs that it is made of
    _in_staffline = False
    _current_staffline_components = []
    for r_labels in intervals:
        if not _in_staffline:
            # Last row did not contain staffline components.
            if len(r_labels) == 0:
                # No staffline component on current row
                continue
            else:
                _in_staffline = True
                _current_staffline_components += r_labels
        else:
            # Last row contained staffline components.
            if len(r_labels) == 0:
                # Current staffline has no more rows.
                staffline_components.append(set(_current_staffline_components))
                _current_staffline_components = []
                _in_staffline = False
                continue
            else:
                # Current row contains staffline components: the current
                # staffline continues.
                _current_staffline_components += r_labels

    logging.info('No. of stafflines, with component groups: {0}'
                 ''.format(len(staffline_components)))

    # Now: merge the staffline components into one bbox/mask.
    logging.info('Merging staffline components into staffline bboxes and masks.')
    staffline_bboxes = []
    staffline_masks = []
    for sc in sorted(staffline_components,
                     key=lambda c: min([bboxes[cc][0]
                                        for cc in c])):  # Sorted top-down
        st, sl, sb, sr = n_rows, n_cols, 0, 0
        for component in sc:
            t, l, b, r = bboxes[component]
            st, sl, sb, sr = min(t, st), min(l, sl), max(b, sb), max(r, sr)
        _sm = gt[st:sb, sl:sr]
        staffline_bboxes.append((st, sl, sb, sr))
        staffline_masks.append(_sm)

    # Check if n. of stafflines is divisible by 5
    n_stafflines = len(staffline_bboxes)
    logging.info('\tTotal stafflines: {0}'.format(n_stafflines))
    if n_stafflines % 5 != 0:
        import matplotlib.pyplot as plt
        stafllines_mask_image = numpy.zeros(gt.shape)
        for i, (_sb, _sm) in enumerate(zip(staffline_bboxes, staffline_masks)):
            t, l, b, r = _sb
            stafllines_mask_image[t:b, l:r] = min(255, (i * 333) % 255 + 40)
        plt.imshow(stafllines_mask_image, cmap='jet', interpolation='nearest')
        plt.show()
        raise ValueError('No. of stafflines is not divisible by 5!')

    logging.info('Creating staff bboxes and masks.')

    #  - Go top-down and group the stafflines by five to get staves.
    #    (The staffline bboxes are already sorted top-down.)
    staff_bboxes = []
    staff_masks = []

    for i in range(n_stafflines // 5):
        _sbb = staffline_bboxes[5*i:5*(i+1)]
        _st = min([bb[0] for bb in _sbb])
        _sl = min([bb[1] for bb in _sbb])
        _sb = max([bb[2] for bb in _sbb])
        _sr = max([bb[3] for bb in _sbb])
        staff_bboxes.append((_st, _sl, _sb, _sr))
        staff_masks.append(gt[_st:_sb, _sl:_sr])

    logging.info('Total staffs: {0}'.format(len(staff_bboxes)))

    ##################################################################
    # (Optionally fill in missing pixels, based on full image.)
    logging.info('SKIP: fill in missing pixels based on full image.')
    #  - Load full image
    #  - Find gap regions
    #  - Obtain gap region masks from full image
    #  - Add gap region mask to staffline mask.

    # Create the CropObjects for stafflines and staffs:
    #  - Load corresponding annotation, to which the stafflines and
    #    staves should be added. (This is needed to correctly set docname
    #    and objids.)
    if not args.annot:
        cropobjects = []
        next_objid = 0
        dataset_namespace = 'FCNOMR'
        docname = os.path.splitext(os.path.basename(args.staff_imfile))[0]
    else:
        if not os.path.isfile(args.annot):
            raise ValueError('Annotation file {0} does not exist!'.format(args.annot))

        logging.info('Creating cropobjects...')
        cropobjects = parse_cropobject_list(args.annot)
        logging.info('Non-staffline cropobjects: {0}'.format(len(cropobjects)))
        next_objid = max([c.objid for c in cropobjects]) + 1
        dataset_namespace = cropobjects[0].dataset
        docname = cropobjects[0].doc

    #  - Create the staffline CropObjects
    staffline_cropobjects = []
    for sl_bb, sl_m in zip(staffline_bboxes, staffline_masks):
        uid = CropObject.build_uid(dataset_namespace, docname, next_objid)
        t, l, b, r = sl_bb
        c = CropObject(objid=next_objid,
                       clsname=STAFFLINE_CLSNAME,
                       top=t, left=l, height=b - t, width=r - l,
                       mask=sl_m,
                       uid=uid)
        staffline_cropobjects.append(c)
        next_objid += 1

    if not args.stafflines_only:

        #  - Create the staff CropObjects
        staff_cropobjects = []
        for s_bb, s_m in zip(staff_bboxes, staff_masks):
            uid = CropObject.build_uid(dataset_namespace, docname, next_objid)
            t, l, b, r = s_bb
            c = CropObject(objid=next_objid,
                           clsname=STAFF_CLSNAME,
                           top=t, left=l, height=b - t, width=r - l,
                           mask=s_m,
                           uid=uid)
            staff_cropobjects.append(c)
            next_objid += 1

        #  - Add the inlinks/outlinks
        for i, sc in enumerate(staff_cropobjects):
            sl_from = 5 * i
            sl_to = 5 * (i + 1)
            for sl in staffline_cropobjects[sl_from:sl_to]:
                sl.inlinks.append(sc.objid)
                sc.outlinks.append(sl.objid)

        # Add the staffspaces.
        staffspace_cropobjects = []
        for i, staff in enumerate(staff_cropobjects):
            current_stafflines = [sc for sc in staffline_cropobjects if sc.objid in staff.outlinks]
            sorted_stafflines = sorted(current_stafflines, key=lambda x: x.top)

            current_staffspace_cropobjects = []

            # Percussion single-line staves do not have staffspaces.
            if len(sorted_stafflines) == 1:
                continue

            # Internal staffspace
            for s1, s2 in zip(sorted_stafflines[:-1], sorted_stafflines[1:]):
                # s1 is the UPPER staffline, s2 is the LOWER staffline
                # Left and right limits: to simplify things, we take the column
                # *intersection* of (s1, s2). This gives the invariant that
                # the staffspace is limited from top and bottom in each of its columns.
                l = max(s1.left, s2.left)
                r = min(s1.right, s2.right)

                # Shift s1, s2 to the right by this much to have the cols. align
                # All of these are non-negative.
                dl1, dl2 = l - s1.left, l - s2.left
                dr1, dr2 = s1.right - r, s2.right - r

                # The stafflines are not necessarily straight,
                # so top is given for the *topmost bottom edge* of the top staffline + 1

                # First create mask
                canvas = numpy.zeros((s2.bottom - s1.top, r - l), dtype='uint8')

                # Paste masks into canvas.
                # This assumes that the top of the bottom staffline is below
                # the top of the top staffline... and that the bottom
                # of the top staffline is above the bottom of the bottom
                # staffline. This may not hold in very weird situations,
                # but it's good for now.
                logging.debug(s1.bounding_box, s1.mask.shape)
                logging.debug(s2.bounding_box, s2.mask.shape)
                logging.debug(canvas.shape)
                logging.debug('l={0}, dl1={1}, dl2={2}, r={3}, dr1={4}, dr2={5}'
                              ''.format(l, dl1, dl2, r, dr1, dr2))
                #canvas[:s1.height, :] += s1.mask[:, dl1:s1.width-dr1]
                #canvas[-s2.height:, :] += s2.mask[:, dl2:s2.width-dr2]

                # We have to deal with staffline interruptions.
                # One way to do this
                # is watershed fill: put markers along the bottom and top
                # edge, use mask * 10000 as elevation

                s1_above, s1_below = staffline_surroundings_mask(s1)
                s2_above, s2_below = staffline_surroundings_mask(s2)

                # Get bounding boxes of the individual stafflines' masks
                # that intersect with the staffspace bounding box, in terms
                # of the staffline bounding box.
                s1_t, s1_l, s1_b, s1_r = 0, dl1, \
                                         s1.height, s1.width - dr1
                s1_h, s1_w = s1_b - s1_t, s1_r - s1_l
                s2_t, s2_l, s2_b, s2_r = canvas.shape[0] - s2.height, dl2, \
                                         canvas.shape[0], s2.width - dr2
                s2_h, s2_w = s2_b - s2_t, s2_r - s2_l

                logging.debug(s1_t, s1_l, s1_b, s1_r, (s1_h, s1_w))

                # We now take the intersection of s1_below and s2_above.
                # If there is empty space in the middle, we fill it in.
                staffspace_mask = numpy.ones(canvas.shape)
                staffspace_mask[s1_t:s1_b, :] -= (1 - s1_below[:, dl1:s1.width-dr1])
                staffspace_mask[s2_t:s2_b, :] -= (1 - s2_above[:, dl2:s2.width-dr2])

                ss_top = s1.top
                ss_bottom = s2.bottom
                ss_left = l
                ss_right = r

                uid = CropObject.build_uid(dataset_namespace, docname, next_objid)

                staffspace = CropObject(next_objid, STAFFSPACE_CLSNAME,
                                        top=ss_top, left=ss_left,
                                        height=ss_bottom - ss_top,
                                        width=ss_right - ss_left,
                                        mask=staffspace_mask,
                                        uid=uid)

                staffspace.inlinks.append(staff.objid)
                staff.outlinks.append(staffspace.objid)

                current_staffspace_cropobjects.append(staffspace)

                next_objid += 1

            # Add top and bottom staffspace.
            # These outer staffspaces will have the width
            # of their bottom neighbor, and height derived
            # from its mask columns.
            # This is quite approximate, but it should do.

            # Upper staffspace
            tsl = sorted_stafflines[0]
            tsl_heights = tsl.mask.sum(axis=0)
            tss = current_staffspace_cropobjects[0]
            tss_heights = tss.mask.sum(axis=0)

            uss_top = max(0, tss.top - max(tss_heights))
            uss_left = tss.left
            uss_width = tss.width
            # We use 1.5, so that large noteheads
            # do not "hang out" of the staffspace.
            uss_height = int(tss.height / 1.2)
            # Shift because of height downscaling:
            uss_top += tss.height - uss_height
            uss_mask = tss.mask[:uss_height, :] * 1

            uid = CropObject.build_uid(dataset_namespace, docname, next_objid)
            staffspace = CropObject(next_objid, STAFFSPACE_CLSNAME,
                                    top=uss_top, left=uss_left,
                                    height=uss_height,
                                    width=uss_width,
                                    mask=uss_mask,
                                    uid=uid)
            current_staffspace_cropobjects.append(staffspace)
            staff.outlinks.append(staffspace.objid)
            staffspace.inlinks.append(staff.objid)
            next_objid += 1

            # Lower staffspace
            bss = current_staffspace_cropobjects[-1]
            bss_heights = bss.mask.sum(axis=0)
            bsl = sorted_stafflines[-1]
            bsl_heights = bsl.mask.sum(axis=0)

            lss_top = bss.bottom # + max(bsl_heights)
            lss_left = bss.left
            lss_width = bss.width
            lss_height = int(bss.height / 1.2)
            lss_mask = bss.mask[:lss_height, :] * 1

            uid = CropObject.build_uid(dataset_namespace, docname, next_objid)
            staffspace = CropObject(next_objid, STAFFSPACE_CLSNAME,
                                    top=lss_top, left=lss_left,
                                    height=lss_height,
                                    width=lss_width,
                                    mask=lss_mask,
                                    uid=uid)
            current_staffspace_cropobjects.append(staffspace)
            staff.outlinks.append(staffspace.objid)
            staffspace.inlinks.append(staff.objid)
            next_objid += 1

            # ################ End of dealing with upper/lower staffspace ######

            # Add to current list
            staffspace_cropobjects += current_staffspace_cropobjects

        # - Join the lists together
        cropobjects_with_staffs = cropobjects \
                                  + staffline_cropobjects \
                                  + staffspace_cropobjects \
                                  + staff_cropobjects

    else:
        cropobjects_with_staffs = cropobjects + staffline_cropobjects

    logging.info('Exporting the new cropobject list: {0} objects'
                    ''.format(len(cropobjects_with_staffs)))
    # - Export the combined list.
    cropobject_string = export_cropobject_list(cropobjects_with_staffs)
    if args.export is not None:
        with open(args.export, 'w') as hdl:
            hdl.write(cropobject_string)
    else:
        print(cropobject_string)

    _end_time = time.clock()
    logging.info('add_staffline_symbols.py done in {0:.3f} s'
                    ''.format(_end_time - _start_time))
예제 #23
0

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Converts MUSCIMA++ v1.0 to MUSCIMA++ v2.0')
    parser.add_argument('--source_directory', type=str, default="v1.0",
                        help='Directory of the MUSCIMA++ dataset v1.0')
    parser.add_argument("--destination_directory", type=str, default="v2.0",
                        help="Directory, where the upgraded MUSCIMA++ v2.0 dataset should be written to.")

    flags = parser.parse_args()

    source_directory = flags.source_directory
    destination_directory = flags.destination_directory

    directory_mapping = {"data/cropobjects_withstaff": "data/annotations"}

    for source_subdirectory, destination_subdirectory in directory_mapping.items():
        source = os.path.join(source_directory, source_subdirectory)
        destination = os.path.join(destination_directory, destination_subdirectory)

        for annotation_file in tqdm(os.listdir(source), "Converting annotations"):
            annotation_file_path = os.path.join(source, annotation_file)
            crop_objects = parse_cropobject_list(annotation_file_path)
            tree = parse(annotation_file_path)
            document = os.path.splitext(annotation_file)[0]
            upgraded_tree = upgrade_xml_file(tree, crop_objects, "MUSCIMA-pp_2.0", document)

            upgraded_tree.write(os.path.join(destination, annotation_file), encoding="utf-8",
                                xml_declaration=True)
            prettify_xml_file(os.path.join(destination, annotation_file))
예제 #24
0
 def get_crop_objects_from_xml_file(self,
                                    xml_file: str) -> List[CropObject]:
     # e.g., xml_file = 'data/muscima_pp/v0.9/data/cropobjects/CVC-MUSCIMA_W-01_N-10_D-ideal.xml'
     crop_objects = parse_cropobject_list(xml_file)
     return crop_objects
 def load_crop_objects(self, xml_file):
     crop_objects = parse_cropobject_list(xml_file)
     for crop_object in crop_objects:
         # Some classes have special characters in their class name that we have to remove
         crop_object.clsname = crop_object.clsname.replace('"', '').replace('/', '').replace('.', '')
     return crop_objects
예제 #26
0
    def call(self, request):

        logging.info(
            'ObjectDetectionHandler: Calling with input bounding box {0}'
            ''.format(self.input_bounding_box))

        self.current_request = request

        # Format request for client
        #  (=pickle it, plus pickle-within-pickle for image array)
        f_request = self._format_request(request)

        _rstring = str(uuid.uuid4())
        temp_basename = 'MUSCIMarker.omrapp-request.' + _rstring + '.pkl'
        request_fname = os.path.join(self.tmp_dir, temp_basename)
        with open(request_fname, 'w') as fh:
            pickle.dump(f_request, fh, protocol=0)

        # Send to ObjectDetectionOMRAppClient
        # We didn't want to introduce "mhr" as a dependency,
        # so we wrote our own client for omrapp.

        response_basename = 'MUSCIMarker.omrapp-response.' + _rstring + '.xml'
        response_fname = os.path.join(self.tmp_dir, response_basename)

        client = ObjectDetectionOMRAppClient(host=HOST,
                                             port=self.port,
                                             request_file=request_fname,
                                             response_file=response_fname)
        client.call()
        #   ...this happens in ObjectDetectionOMRAppClient...
        # Open socket according to conf
        # Send request to server
        # Collect raw result
        # Close connection

        # Convert raw result (XML) to output representation (CropObjects)
        if not os.path.isfile(response_fname):
            raise OSError('ObjectDetectionHandler: Did not receive'
                          ' response file {0}'.format(response_fname))

        try:
            cropobjects = parse_cropobject_list(response_fname)
            print(export_cropobject_list(cropobjects))
            # Verify that result is valid (re-request on failure?)
            if os.path.isfile(response_fname):
                os.unlink(response_fname)

        except:
            logging.warn('ObjectDetectionHandler: Could not parse'
                         ' response file {0}'.format(response_fname))
            cropobjects = []
        # finally:
        #     # Cleanup files.
        #     logging.info('Cleaning up files.')
        #     if os.path.isfile(request_fname):
        #         os.unlink(request_fname)

        # Bind output representation to self.result to fire bindings
        #  - Subsequent processing means adding the CropObjects
        #    into the current annotation, in this case.
        #  - This can also trigger auto-parse.
        self.response_cropobjects = cropobjects
예제 #27
0
def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    # The algorithm:
    #  - build the cost function(s) for a pair of CropObjects
    #  - align the objects, using the cost function

    # First alignment: try just matching a predicted object to the nearest
    # true object.
    # First distance function: proportion of shared pixels.
    # Rule: if two objects don't share a pixel, they cannot be considered related.
    # Object classes do not factor into this so far.

    truth = parse_cropobject_list(args.true)
    prediction = parse_cropobject_list(args.prediction)

    _parse_time = time.clock()
    logging.info(
        'Parsing {0} true and {1} prediction cropobjects took {2:.2f} s'
        ''.format(len(truth), len(prediction), _parse_time - _start_time))

    r, p, f = cropobjects_rpf(truth, prediction)

    _rpf_time = time.clock()
    logging.info('Computing {0} entries of r/p/f matrices took {1:.2f} s'
                 ''.format(
                     len(truth) * len(prediction), _rpf_time - _parse_time))

    alignment = align_cropobjects(truth, prediction, fscore=f)

    _aln_time = time.clock()
    logging.info('Computing alignment took {0:.2f} s'
                 ''.format(_aln_time - _rpf_time))

    # Now compute agreement: precision and recall on pixels
    # of the aligneed CropObjects.

    # We apply strict clsnames only here, after the CropObjects have been
    # aligned to each other using pixel metrics.
    _strict_clsnames = (not args.no_strict_clsnames)
    total_r, total_p, total_f = rpf_given_alignment(
        alignment,
        r,
        p,
        strict_clsnames=_strict_clsnames,
        truths=truth,
        predictions=prediction)

    print('Truth objs.:\t{0}'.format(len(truth)))
    print('Pred. objs.:\t{0}'.format(len(prediction)))
    print('==============================================')
    print('Recall:\t\t{0:.3f}\nPrecision:\t{1:.3f}\nF-score:\t{2:.3f}'
          ''.format(total_r, total_p, total_f))

    ##########################################################################
    # Check if the alignment is a pairing -- find truth objects
    # with more than one prediction aligned to them.
    if args.analyze_alignment:
        t_aln_dict = collections.defaultdict(list)
        for i, j in alignment:
            t_aln_dict[i].append(prediction[j])

        multiple_truths = [
            truth[i] for i in t_aln_dict if len(t_aln_dict[i]) > 1
        ]
        multiple_truths_aln_dict = {
            t: t_aln_dict[t]
            for t in t_aln_dict if len(t_aln_dict[t]) > 1
        }

        print('Truth multi-aligned CropObject classes:\n{0}'
              ''.format(
                  pprint.pformat({(truth[t].objid, truth[t].clsname):
                                  [(p.objid, p.clsname) for p in t_aln_dict[t]]
                                  for t in multiple_truths_aln_dict})))

    ##########################################################################
    # Check if the aligned objects have the same classes
    if args.analyze_clsnames:
        different_clsnames_pairs = []
        for i, j in alignment:
            if truth[i].clsname != prediction[j].clsname:
                different_clsnames_pairs.append((truth[i], prediction[j]))
        print('Aligned pairs with different clsnames:\n{0}'
              ''.format('\n'.join([
                  '{0}.{1}\t{2}.{3}'
                  ''.format(t.objid, t.clsname, p.objid, p.clsname)
                  for t, p in different_clsnames_pairs
              ])))

    _end_time = time.clock()
    logging.info('analyze_agreement.py done in {0:.3f} s'.format(_end_time -
                                                                 _start_time))
예제 #28
0
def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    # Get list of MuNGs
    available_mung_names = [
        os.path.splitext(f)[0] for f in os.listdir(args.mung_dir)
        if f.endswith('.xml')
    ]
    mungs = {
        f: parse_cropobject_list(os.path.join(args.mung_dir, f + '.xml'))
        for f in available_mung_names
    }

    # Get union of all labels. (In some images, a label might not exist, but
    #  we still want to export at least the black screen.)
    available_clsnames = set(
        [c.clsname for c in itertools.chain.from_iterable(mungs.values())])

    # Create output directories
    if not os.path.isdir(args.output_masks):
        os.mkdir(args.output_masks)
    if not os.path.isdir(args.output_labels):
        os.mkdir(args.output_labels)
    for c in available_clsnames:
        c_mask_dir = os.path.join(args.output_masks, c)
        if not os.path.isdir(c_mask_dir):
            os.mkdir(c_mask_dir)
        c_labels_dir = os.path.join(args.output_labels, c)
        if not os.path.isdir(c_labels_dir):
            os.mkdir(c_labels_dir)

    if args.export_fulls:
        m_fulls_dir = os.path.join(args.output_masks, 'fulls')
        if not os.path.isdir(m_fulls_dir):
            os.mkdir(m_fulls_dir)
        l_fulls_dir = os.path.join(args.output_labels, 'fulls')
        if not os.path.isdir(l_fulls_dir):
            os.mkdir(l_fulls_dir)

    # Get list of images
    available_img_names = [
        os.path.splitext(f)[0] for f in os.listdir(args.img_dir)
        if (f.lower().endswith('jpg') or (f.lower().endswith('png')))
    ]

    # Get their intersection: both available MuNG and image.
    available_names = [
        f for f in available_mung_names if f in available_img_names
    ]

    # For each available MuNG/image pair:
    for f in available_names:
        print('Processing image: {}'.format(f))
        img_fpath = os.path.join(args.img_dir, f + '.png')
        if not os.path.isfile(img_fpath):
            img_fpath = img_fpath[:-3] + 'jpg'
        img = imread(img_fpath, mode='L')

        if args.export_fulls:
            m_full_file = os.path.join(m_fulls_dir,
                                       os.path.basename(img_fpath))
            imsave(m_full_file, img)
            l_full_file = os.path.join(l_fulls_dir,
                                       os.path.basename(img_fpath))
            imsave(l_full_file, img)

        img_h, img_w = img.shape
        mung = mungs[f]
        mung_dict = collections.defaultdict(list)
        for m in mung:
            mung_dict[m.clsname].append(m)

        # For each label:
        for c in available_clsnames:
            labels = numpy.zeros((img_h, img_w), dtype='uint16')
            c_mungs = mung_dict[c]
            for i, m in enumerate(c_mungs):
                label = i + 1
                labels[m.top:m.bottom, m.left:m.right] = label * m.mask

            # Export labels image
            output_labels_file = os.path.join(args.output_labels, c,
                                              f + '.png')
            imsave(output_labels_file, labels)

            # Export masks image
            output_mask_file = os.path.join(args.output_masks, c, f + '.png')
            mask = labels * 1
            mask[mask != 0] = 1
            imsave(output_mask_file, mask)

    _end_time = time.clock()
    logging.info('SEILS export_masks.py done in {0:.3f} s'.format(_end_time -
                                                                  _start_time))
예제 #29
0
                _o_obj = _cropobj_dict[o]
                if _o_obj.clsname == 'stem':
                    _has_stem = True
                    stem_obj = _o_obj
                elif _o_obj.clsname == 'beam':
                    _has_beam_or_flag = True
                elif _o_obj.clsname.endswith('flag'):
                    _has_beam_or_flag = True
            if _has_stem and (not _has_beam_or_flag):
                # We also need to check against quarter-note chords.
                # Stems only have inlinks from noteheads, so checking
                # for multiple inlinks will do the trick.
                if len(stem_obj.inlinks) == 1:
                    notes.append((c, stem_obj))

    quarter_notes = [(n, s) for n, s in notes if n.clsname == 'notehead-full']
    half_notes = [(n, s) for n, s in notes if n.clsname == 'notehead-empty']
    return quarter_notes, half_notes


if __name__ == '__main__':
    # Change this to reflect wherever your MUSCIMA++ data lives
    CROPOBJECT_DIR = '../DataSets/MUSCIMA-pp_v1.0/v1.0/data/cropobjects_manual'

    cropobject_fnames = [
        os.path.join(CROPOBJECT_DIR, f) for f in os.listdir(CROPOBJECT_DIR)
    ]
    docs = [parse_cropobject_list(f) for f in cropobject_fnames]

    qns_and_hns = [extract_notes_from_doc(cropobjects) for cropobjects in docs]
예제 #30
0
    def get_ordered_notes(self,
                          filter_tied=False,
                          reverse_columns=False,
                          return_columns=False):
        """Returns the MuNG objects corresponding to notes in the canonical
        ordering: by page, system, left-to-right, and top-down within
        simultaneities (e.g. chords).

        :param reverse_columns: If set, will order the columns bottom-up
            instead of top-down. Use this for events alignment, not for score
            inference.
        """
        self.update()
        if 'mung' not in self.views:
            raise MSMDDBError('Score {0}: mung view not available!'
                              ''.format(self.name))
        mung_files = self.view_files('mung')

        # Algorithm:
        #  - Create hard ordering constraints:
        #     - pages (already done: mungos_per_page)
        #     - systems

        notes_per_page = []

        for f in mung_files:
            mungos = parse_cropobject_list(f)
            mgraph = NotationGraph(mungos)
            _CONST = InferenceEngineConstants()

            note_mungos = [c for c in mungos if 'midi_pitch_code' in c.data]
            system_mungos = [c for c in mungos if c.clsname == 'staff']
            system_mungos = sorted(system_mungos, key=lambda m: m.top)

            notes_per_system = []

            for s in system_mungos:
                system_notes = mgraph.ancestors(
                    s, classes=_CONST.NOTEHEAD_CLSNAMES)
                for c in system_notes:
                    if 'midi_pitch_code' not in c.data:
                        print('Notehead without pitch: {0}' ''.format(str(c)))
                        continue
                    if c.data['midi_pitch_code'] is None:
                        print('Notehead with pitch=None: {0}'
                              ''.format(str(c)))

                system_notes = [
                    c for c in system_notes if 'midi_pitch_code' in c.data
                ]

                # print('Ancestors of system {0}: {1}'.format(s, system_notes))
                # Process simultaneities. We use a very small overlap ratio,
                # because we want to catch also chords that have noteheads
                # on both sides of the stem. Sorted top-down.
                # Remove all tied notes.
                if filter_tied:
                    system_notes = [
                        m for m in system_notes if ('tied' not in m.data) or (
                            ('tied' in m.data) and (m.data['tied'] != 1))
                    ]

                system_note_columns = group_mungos_by_column(
                    system_notes,
                    MIN_OVERLAP_RATIO=0.05,
                    reverse_columns=reverse_columns)
                # print('System {0}: n_columns = {1}'
                #       ''.format(s.objid, len(system_note_columns)))
                ltr_sorted_columns = sorted(system_note_columns.items(),
                                            key=lambda kv: kv[0])
                # print('ltr_sorted_columns[0] = {0}'.format(ltr_sorted_columns[0]))
                system_ordered_simultaneities = [
                    c[1] for c in ltr_sorted_columns
                ]
                # print('system_ordered_sims[0] = {0}'.format(system_ordered_simultaneities[0]))

                notes_per_system.append(system_ordered_simultaneities)

            # print('Total entries in notes_per_system = {0}'.format(len(notes_per_system)))
            notes_per_page.append(notes_per_system)

        # Data structure
        # --------------
        # notes_per_page = [
        #   notes_per_system_1 = [
        #       ordered_simultaneities = [
        #           simultaneity1 = [ a'', f'', c'', a' ],
        #           simultaneity2 = [ g'', e'', c'', bes' ],
        #           ...
        #       ]
        #   ],
        #   notes_per_system_2 = [
        #       simultaneity1 = [ ... ]
        #       ...
        #   ]
        # ]

        # Unroll simultaneities notes according to this data structure

        # DEBUG
        # print('notes_per_page: {0}'.format(pprint.pformat(notes_per_page)))

        ordered_simultaneities = []
        for page in notes_per_page:
            for system in page:
                ordered_simultaneities.extend(system)

        if return_columns:
            return ordered_simultaneities

        ordered_notes = []
        for sim in ordered_simultaneities:
            ordered_notes.extend(list(reversed(sim)))

        return ordered_notes