def export(self, name, force=False, add_lemmes=False): """ Export tokens in a file. force for export in the valid extension, otherwise it use the pending. """ self.content = "" for sample in self.samples: for token in sample: lemme = "" if add_lemmes: # Add lemme only if different from original if token.lemme != token.original: lemme = u"/%s" % token.lemme self.content += u"%s/%s%s " % (unicode( token.original), token.tag, lemme) self.content += u"\n" # Carriage return on each sample, for human reading # Define extention ext = self.PENDING_EXT if force: if add_lemmes: ext = self.LEXICON_EXT else: ext = self.VALID_EXT save_to_file(os.path.join(self.PATH, "%s%s" % (name, ext)), self.content)
def export(self, name, force=False, add_lemmes=False): """ Export tokens in a file. force for export in the valid extension, otherwise it use the pending. """ self.content = "" for sample in self.samples: for token in sample: lemme = "" if add_lemmes: # Add lemme only if different from original if token.lemme != token.original: lemme = u"/%s" % token.lemme self.content += u"%s/%s%s " % (unicode(token.original), token.tag, lemme) self.content += u"\n" # Carriage return on each sample, for human reading # Define extention ext = self.PENDING_EXT if force: if add_lemmes: ext = self.LEXICON_EXT else: ext = self.VALID_EXT save_to_file(os.path.join(self.PATH, "%s%s" % (name, ext)), self.content)
def reset_triggers(self): """ For full training, we need to remove previous triggers. """ save_to_file("corpus/triggers.trg", "")