def fileStart(self, file_name): """Opens output files for the sentences kept, as well as one for each filter, where the sentences filtered by that particular filter are written.""" self.kept_file = FileWriter(file_name + '.kept').open() self.filtered_files = [FileWriter(file_name + '.f' + str(i)).open() for i in xrange(len(self.filters))]
def export_model(model_file, out_file): """Saves the model. The output will be utf-8 encoded.""" # model = model_mapping[model_type].load(model_file) model = LsiModel.load(model_file) with FileWriter(out_file, 'w').open() as out: out.write(u"{0}\t{1}\n".format(model.numTerms, model.numTopics)) for term in xrange(model.numTerms): word = model.id2word.id2token[term].decode("utf-8") while len(word) > 0 and not word[-1].isalnum(): word = word[0:-1] out.write(u"{0}\n".format(word)) out.write(u"{0}\n".format(u"\t".join( str(f) for f in numpy.asarray(model.projection.u.T[:, term]).flatten())))
class SentenceFilterCallback(ConllCallback): """Filters all incomplete sentences, i.e. those that don't end in a period, question mark, etc. and those that don't have a verb in them.""" def __init__(self): self.sentence = [] self.filters = [] def addFilter(self, filter): """Adds a SentenceFilter to the filter list.""" if filter is not None and filter not in self.filters: self.filters.append(filter) def fileStart(self, file_name): """Opens output files for the sentences kept, as well as one for each filter, where the sentences filtered by that particular filter are written.""" self.kept_file = FileWriter(file_name + '.kept').open() self.filtered_files = [FileWriter(file_name + '.f' + str(i)).open() for i in xrange(len(self.filters))] def sentenceStart(self): self.sentence = [] def word(self, attributes): self.sentence.append(attributes) def sentenceEnd(self): if len(self.sentence) > 0: for i, filter in enumerate(self.filters): if not filter.filter(self.sentence): self.filtered_files[i].write(u"\n".join( u"\t".join(word) for word in self.sentence)) self.filtered_files[i].write(u"\n\n") return self.kept_file.write(u"\n".join( u"\t".join(word) for word in self.sentence)) self.kept_file.write(u"\n\n") def fileEnd(self): """Closes all the files.""" self.kept_file.close() for ff in self.filtered_files: ff.close()