def fieldStart(self, field): DefaultConllCallback.fieldStart(self, field.lower()) if (self.cc_field == 'title'): self._title = [] self._cat = self._gold_map.get(self.cc_title, None) else: self.first = True self.mode = NERTrainingCallback.NO_LINK
def fileEnd(self): """Closes the output file.""" if self._sent.num_train > 0: sys.stderr.write("Written {0} sentences out of {1}, with avg length = {2} for file {3}.\n".format( self._sent.num_train, self._sent.num_sentences, float(self._sent.num_words) / self._sent.num_train, self.cc_file)) self._sent.clear_statistics() DefaultConllCallback.fileEnd(self) self._out.close() self._out = None
def __init__(self, out_dir): """Initializes the callback. The training sentences are written to C{outs}. @param outs the output stream to write the data to.""" if not ensure_dir(out_dir): raise ValueError DefaultConllCallback.__init__(self) self._sent = SentenceData() self._out_dir = out_dir self._title = [] self._cat = None self._gold_map = {}
def fieldStart(self, field): DefaultConllCallback.fieldStart(self, field) self._sentence = (field.lower() == 'body' and not self.cc_redirect)
def documentStart(self, title): DefaultConllCallback.documentStart(self, title) self._words = []
def fileStart(self, file_name): """Opens the output file.""" DefaultConllCallback.fileStart(self, file_name) self._out = FileWriter( os.path.join(self._out_dir, os.path.basename(file_name))).open()
def __init__(self, out_dir): """@param out_dir the directory the output files will be put.""" if not ensure_dir(out_dir): raise ValueError DefaultConllCallback.__init__(self) self._out_dir = out_dir
def fileEnd(self): """Closes the output file.""" DefaultConllCallback.fileEnd(self) self._out.close() self._out = None