Beispiel #1
0
def main(tags: list, files: list):
    # Load tags from file list
    tags = list(map(loadFromFile, tags))

    model = Model(tags)

    for file in files:
        with open(file, 'r', errors='ignore') as fd:
            text = Text(fd.read())


        model.classify(text)

        print(file, text.tag)
Beispiel #2
0
def main(dataset_dir: str, output_dir: str, language: str):

    init_dir = os.getcwd()

    try:
        os.chdir(dataset_dir)
    except IOError as e:
        logging.error(e)
        return

    trainer = Trainer(language)

    tags = os.listdir()

    for t in tags:
        os.chdir(t)

        tag = Tag(t)
        trainer.addTag(tag)

        for file in os.listdir():
            with open(file, 'r', errors='ignore') as fd:
                trainer.addText(Text(fd.read(), tag))

        os.chdir('..')

    logging.info("Training \"{}\" using {} files.".format(tags, len(trainer.corpus)))
    trainer.train()

    os.chdir(init_dir)

    # Save new classes sets
    os.chdir(output_dir)
    for tag in trainer.tags:
        with open(tag.name+'.tag', 'bw') as dump_fd:
            pickle.dump(tag, dump_fd)

    # TODO: move old files to a folder

    logging.info("Model updated/saved.")