def main(tags: list, files: list): # Load tags from file list tags = list(map(loadFromFile, tags)) model = Model(tags) for file in files: with open(file, 'r', errors='ignore') as fd: text = Text(fd.read()) model.classify(text) print(file, text.tag)
def main(dataset_dir: str, output_dir: str, language: str): init_dir = os.getcwd() try: os.chdir(dataset_dir) except IOError as e: logging.error(e) return trainer = Trainer(language) tags = os.listdir() for t in tags: os.chdir(t) tag = Tag(t) trainer.addTag(tag) for file in os.listdir(): with open(file, 'r', errors='ignore') as fd: trainer.addText(Text(fd.read(), tag)) os.chdir('..') logging.info("Training \"{}\" using {} files.".format(tags, len(trainer.corpus))) trainer.train() os.chdir(init_dir) # Save new classes sets os.chdir(output_dir) for tag in trainer.tags: with open(tag.name+'.tag', 'bw') as dump_fd: pickle.dump(tag, dump_fd) # TODO: move old files to a folder logging.info("Model updated/saved.")