Ejemplo n.º 1
0
def read_all_page_stats(books: List[str], ignore_page: List[str] = []):
    all_stats = []
    for book in books:
        book = DatabaseBook(book)
        for page in book.pages():
            if any([s in page.page for s in ignore_page]):
                continue
            all_stats.append(page.page_statistics())

    return all_stats
Ejemplo n.º 2
0
 def post(self, request, book, operation):
     body = json.loads(request.body, encoding='utf-8')
     book = DatabaseBook(book)
     algorithm = Step.predictor(AlgorithmTypes(operation))
     page_selection = PageSelection.from_params(PageSelectionParams.from_dict(body), book)
     pages = page_selection.get_pages(algorithm.unprocessed)
     return Response({
         'pages': [p.page for p in pages],
         'pageCount': page_selection.page_count.value,
         'singlePage': page_selection.single_page,
         'book': book.book,
         'totalPages': len(book.pages()),
     })
Ejemplo n.º 3
0
            x.value: x
            for x in MonodiXlsxConfig.default_config
        }
        self.entries = MonodiXlsxConfig.default_config


if __name__ == '__main__':

    from matplotlib import pyplot as plt
    from PIL import Image
    import numpy as np
    from database import DatabaseBook, DatabaseFile
    from database.file_formats.pcgts import PageScaleReference

    book = DatabaseBook('Annotation___Square_Notation')
    pages = book.pages()[0]

    pcgts = [
        DatabaseFile(page, 'pcgts', create_if_not_existing=True).page.pcgts()
        for page in [pages]
    ]
    file = pages.file('color_norm_x2').local_path()
    orig = Image.open(file)
    orig = np.array(orig)
    lines = pcgts[0].page.all_music_lines()
    page = pcgts[0].page
    for p in lines:
        # page = p.line.operation.page

        def p2i(l):
            return page.page_to_image_scale(l,
Ejemplo n.º 4
0
                    connection = connections[0]
                    if document is not None:
                        line_id_start = document.start.line_id
                        line_id_end = document.end.line_id

                        line_ids = [line.id for line in connection.text_region.lines]
                        if page.p_id == document.end.page_id:
                            if line_id_end in line_ids:
                                break
                        if page.p_id == document.start.page_id or document_started:
                            if line_id_start in line_ids or document_started:
                                add_note(mb.lines)
                                document_started = True

                    else:
                        add_note(mb.lines)

            else:
                continue
            break
        return {'notes': notes, 'totalTime': total_duration}


if __name__ == "__main__":
    from database import DatabaseBook

    b = DatabaseBook('Pa_14819')
    pcgts = [p.pcgts() for p in b.pages()][0]
    sme = SimpleMidiExporter([pcgts])
    sme.generate_midi("/tmp/test.mid")
Ejemplo n.º 5
0
 def test_page_selection(self):
     book = DatabaseBook('demo')
     p = PageSelectionParams(count=PageCount.ALL, )
     sel = PageSelection.from_params(p, book)
     self.assertListEqual([p.local_path() for p in sel.get_pages()],
                          [p.local_path() for p in book.pages()])
Ejemplo n.º 6
0
def extract_from_book(book: DatabaseBook):
    for page in tqdm(book.pages(), desc="Processing {}".format(book.book)):
        extract_from_pcgts(page.pcgts())
Ejemplo n.º 7
0
    if debug:
        import matplotlib.pyplot as plt
        canvas = np.stack(((canvas).astype(np.uint8), ) * 3, -1)
        cv2.polylines(canvas, [central_text_line.astype(np.int32)],
                      False, [255, 0, 0],
                      thickness=4)
        cv2.polylines(canvas, polys, True, [0, 255, 0])
        cv2.polylines(canvas, contours, True, [0, 0, 255])
        plt.imshow(canvas)
        plt.show()

    return [Coords(p) for p in polys]


if __name__ == '__main__':
    from database import DatabaseBook
    import pickle
    book = DatabaseBook('demo')
    page = book.pages()[0]
    with open(
            page.file('connected_components_norm',
                      create_if_not_existing=True).local_path(), 'rb') as f:
        cc = pickle.load(f)
    line = Coords(np.array([[100, 740], [900, 738]]))
    staff_lines = []
    for mr in PcGts.from_file(page.file('pcgts')).page.music_regions:
        for ml in mr.staffs:
            staff_lines += ml.staff_lines

    extract_components(cc, line, staff_lines, debug=True)
Ejemplo n.º 8
0
                            continue
                        line_symbols = symbols[current_symbol_index:neume_pos]
                        # add symbols until position of connection
                        add_line_symbols(line_symbols)
                        current_symbol_index = neume_pos

                        # add the syllable
                        self.get_or_create_current_line_container().children.append(
                            Syllable(sc.syllable.text, SpacedNotes([]))
                        )

                    add_line_symbols(symbols[current_symbol_index:])
                else:
                    tr = element
                    text = " ".join([tl.text() for tl in tr.lines])
                    if len(text) == 0:
                        continue
                    self.line_containers.append(
                        ParatextContainer(
                            text=text
                        )
                    )


if __name__=="__main__":
    from database import DatabaseBook
    b = DatabaseBook('test2')
    pcgts = [ns_pcgts.PcGts.from_file(p.file('pcgts')) for p in b.pages()[4:5]]
    root = PcgtsToMonodiConverter(pcgts).root
    print(json.dumps(root.to_json(), indent=2))
Ejemplo n.º 9
0
    np.random.seed(1)
    if False:
        train_pcgts, val_pcgts = dataset_by_locked_pages(
            0.8,
            [LockState(Locks.SYMBOLS, True),
             LockState(Locks.LAYOUT, True)],
            True,
            [
                # DatabaseBook('Graduel_Part_1'),
                # DatabaseBook('Graduel_Part_2'),
                # DatabaseBook('Graduel_Part_3'),
            ])
    book = DatabaseBook('Gothic_Test')
    meta = Step.meta(AlgorithmTypes.OCR_CALAMARI)
    # model = meta.newest_model_for_book(book)
    model = Model(
        MetaId.from_custom_path(
            BASE_DIR +
            '/internal_storage/pretrained_models/text_calamari/fraktur_historical',
            meta.type()))
    settings = AlgorithmPredictorSettings(model=model, )
    pred = meta.create_predictor(settings)
    ps: List[PredictionResult] = list(pred.predict(book.pages()[0:1]))
    for i, p in enumerate(ps):
        canvas = PcGtsCanvas(p.pcgts.page,
                             p.text_lines[0].line.operation.scale_reference)
        for j, s in enumerate(p.text_lines):
            canvas.draw(s)

        canvas.show()
Ejemplo n.º 10
0
        callback: Optional[PredictionCallback] = None
    ) -> AlgorithmPredictionResultGenerator:
        book = pages[0].book

        self.dict_corrector.load_dict(book=book)

        for page in pages:
            pcgts = page.pcgts()
            text_lines: List[Line] = pcgts.page.all_text_lines()
            single_line_pred_result: List[PredictionResultSingleLine] = []
            for t_line in text_lines:
                text = t_line.text()
                text = text.replace("-", "")
                sentence = self.dict_corrector.segmentate_correct_and_hyphenate_text(
                    text)
                single_line_pred_result.append(
                    PredictionResultSingleLine(t_line, hyphenated=sentence))

            yield PredictionResult(pcgts, page, single_line_pred_result)


if __name__ == '__main__':
    from omr.dataset import DatasetParams, RegionLineMaskData
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState

    b = DatabaseBook('demo')
    val_pcgts = [PcGts.from_file(p.file('pcgts')) for p in b.pages()[0:1]]
    pred = Predictor(AlgorithmPredictorSettings(Meta.best_model_for_book(b)))
    ps = list(pred.predict([p.page.location for p in val_pcgts]))
    import matplotlib.pyplot as plt
Ejemplo n.º 11
0
                                break
                        if page.p_id == document.start.page_id or document_started:

                            if line_id_start in line_ids or document_started:
                                add_block(symbols)
                                document_started = True

                    else:
                        add_block(symbols)
                else:
                    tr = element
                    text = " ".join([tl.text() for tl in tr.lines])
                    if len(text) == 0:
                        continue
                    self.line_containers.append(ParatextContainer(text=text))

            else:
                continue
            break


if __name__ == "__main__":
    from database import DatabaseBook

    b = DatabaseBook('demo2')
    pcgts = [ns_pcgts.PcGts.from_file(x.file('pcgts')) for x in b.pages()]
    root = PcgtsToMonodiConverter(pcgts, document=True).root
    print(json.dumps(root.to_json(), indent=2))
    with open('data.json', 'w', encoding='utf-8') as f:
        json.dump(root.to_json(), f, ensure_ascii=False, indent=4)
Ejemplo n.º 12
0
    Size = 7


global_counts = np.zeros(Counts.Size, dtype=np.uint32)

table = PrettyTable(["Dataset"] +
                    [str(Counts(i))[7:] for i in range(Counts.Size)])
for book_name in args.datasets:
    book = DatabaseBook(book_name)
    counts = np.zeros(Counts.Size, dtype=np.uint32)
    if not book.exists():
        raise ValueError("Dataset '{}' does not exist at '{}'".format(
            book.book, book.local_path()))

    for page in book.pages():
        pcgts = PcGts.from_file(page.file('pcgts'))
        counts[Counts.Pages] += 1

        for mr in pcgts.page.music_regions:
            for ml in mr.staffs:
                counts[Counts.Staves] += 1
                counts[Counts.StaffLines] += len(ml.staff_lines)

                for s in ml.symbols:
                    if isinstance(s, Neume):
                        n: Neume = s
                        counts[Counts.Symbols] += len(n.notes)
                        counts[Counts.NoteComponents] += len(n.notes)
                    else:
                        counts[Counts.Symbols] += 1
Ejemplo n.º 13
0
                     self.local_path()],
                    stdout=subprocess.PIPE)
                result, err = proc.communicate()
                # error code in the java script is to be ignored for now
                exit_code = proc.wait()
            else:
                raise Exception("Cannot create file for {}".format(
                    self.definition.id))


if __name__ == "__main__":
    from database import DatabaseBook
    import database.file_formats.pcgts as ns_pcgts

    b = DatabaseBook('demo')
    page = b.pages()[0]
    path = DatabaseFile(page, 'monodiplus').local_path()
    from ommr4all.settings import BASE_DIR

    script_path = os.path.join(BASE_DIR, 'internal_storage', 'resources',
                               'monodi_svg_render', 'bin', 'one-shot')
    import subprocess

    proc = subprocess.Popen([script_path, path, "-w", "500"],
                            stdout=subprocess.PIPE)
    result, err = proc.communicate()
    str_result = str(result)
    reg = re.match(r".*(<svg.*</svg>).*", str_result).group(1)
    #start_prefix = "<?xml"
    #print(str_result[str_result.find(start_prefix):])
    #print(result)
Ejemplo n.º 14
0
        # normalize errors
        total_diffs = total_diffs / total_diffs[-1]
        # transfer total / errors => acc = 1 - errors / total
        total_diffs[-2] = 1 - 1 / total_diffs[-2]
        cm.plot_confusion_matrix()
        cm.plot_confusion_matrix(normalize=True)
        return f_metrics.mean(axis=0), counts.sum(
            axis=0), acc_counts, acc_acc, total_diffs


if __name__ == '__main__':
    from omr.symboldetection.predictor import SymbolDetectionPredictor, create_predictor, PredictorTypes, SymbolDetectionPredictorParameters
    from prettytable import PrettyTable
    from database import DatabaseBook
    b = DatabaseBook('Graduel')
    eval_pcgts = [PcGts.from_file(p.file('pcgts')) for p in b.pages()[12:13]]
    print([e.page.location.local_path() for e in eval_pcgts])
    predictor = create_predictor(
        PredictorTypes.PIXEL_CLASSIFIER,
        SymbolDetectionPredictorParameters(
            [b.local_path(os.path.join('pc_symbol_detection', 'model'))]))
    gt_symbols, pred_symbols = [], []
    for p in predictor.predict(eval_pcgts):
        pred_symbols.append(p.symbols)
        gt_symbols.append(p.line.operation.music_line.symbols)

    evaluator = SymbolDetectionEvaluator()
    metrics, counts, acc_counts, acc_acc, diffs = evaluator.evaluate(
        gt_symbols, pred_symbols)

    at = PrettyTable()