コード例 #1
0
def on_predict(args):
    gen = forget_except(elmo_difference_pipe(
        metaize(itertools.cycle(["http://export.arxiv.org/"])),
        difference_model_path=args['best_model_path'],
        service_id="difference"),
                        keys=['html_path', 'css', 'html'])
    return gen
コード例 #2
0
def annotate_uploaded_file(file, service_id):
    pprint(BEST_MODELS)
    return next(
        elmo_difference_single_pipe(
            metaize([file]),
            difference_model_path=BEST_MODELS["difference"]['best_model_path'],
            service_id=service_id), None)
コード例 #3
0
def annotate_difference_elmo():
    model_in_the_loop(
        model_dir=config.ELMO_DIFFERENCE_MODEL_PATH,
        collection_path=config.ELMO_DIFFERENCE_COLLECTION_PATH,
        on_train=lambda args: list(
            elmo_difference_model_pipe(metaize(args['samples_files']),
                                       collection_step=args['training_rate'])),
        service_id="difference",
        on_predict=on_predict,
        training_rate_mode='size',
        training_rate_file=config.ELMO_DIFFERENCE_COLLECTION_PATH +
        "/train_over.conll3")
コード例 #4
0
    def make_box_layout_model(self):
        model_pipe = self.ant("annotation.collection", "model")
        model_path = config.hidden_folder + config.layout_model_path + "/test"
        collection_path = config.COLLECTION_PATH

        files = list(glob.glob(collection_path + ".*.pickle"))
        print(
            list(
                model_pipe(metaize(files),
                           training=True,
                           collection_step="_testcase",
                           layout_model_path=model_path)))
コード例 #5
0
ファイル: backend.py プロジェクト: c0ntradicti0n/LayoutEagle
    def fill_library():
        x = None
        while not x:

            try:
                gen = forget_except(filling_pipe(
                    itertools.islice((metaize(itertools.cycle(["pdfs"]))),
                                     100)),
                                    keys=["html_path"])
                for i in range(100):
                    k = next(gen, None)
                    del k
                break
            except Exception:
                logging.error("Getting first 100 threw", exc_info=True)
                break
コード例 #6
0
ファイル: Txt2Mp3.py プロジェクト: c0ntradicti0n/LayoutEagle
        for text, meta in iterator:
            txt_path = meta['html_path'] + ".txt"
            meta['txt_path'] = txt_path
            meta['mp3_path'] = txt_path + '.mp3'
            text = "\n\n".join(text)
            with open(txt_path, "w") as f:
                f.write(text)

                self.logger.info(
                    f"encoding \"{text[100:]}\" as mp3 on path {meta['mp3_path']}"
                )
            os.popen(
                f"text2wave -eval  '(nitech_us_bdl_arctic_hts)'  {meta['txt_path']} -o out.wave"
            ).read()
            os.popen(f"lame out.wave {meta['mp3_path']}").read()
            yield meta['mp3_path'], meta


if __name__ == "__main__":
    from core.pathant.PathAnt import PathAnt
    from layout.model_helpers import find_best_model
    from helpers.list_tools import metaize

    ant = PathAnt()
    print(ant.graph())
    model_path = model_pat = find_best_model()[0]
    pipe = ant("pdf", "mp3")
    res = list(pipe(metaize(["./../test/glue.pdf"]), model_path=model_path))
    pprint(res)
    os.popen(f"mplayer {res[0][0]}").read()