def process_file(gold_dir, fname): infile = os.path.join(gold_dir, fname) source_parser = create_source_parser(options) tarsqidoc = source_parser.parse_file(infile) (ee_vectors, et_vectors) = collect_tarsqidoc_vectors(tarsqidoc) tlinks = collect_tlinks(tarsqidoc) add_reltype_to_vectors(tlinks, ee_vectors, et_vectors) write_vectors(ee_vectors, et_vectors)
def process_future(self): """This is an alternative way to do process() that is not used yet. The difference is that it uses subprocess instead of os.system() and that it pipes each line to the classifier, not using any temporary files. It has one weird problem, which is that when we process the very first line the identifier is missing from the output.""" # TODO: when this is tested enough let it replace process() (ee_vectors, et_vectors) \ = vectors.collect_tarsqidoc_vectors(self.tarsqidoc) mc = mallet.MalletClassifier(self.mallet) mc.add_classifiers(self.ee_model, self.et_model) ee_in = [str(v) for v in ee_vectors] et_in = [str(v) for v in et_vectors] ee_results = mc.classify_vectors(self.ee_model, ee_in) et_results = mc.classify_vectors(self.et_model, et_in) self._add_links_future(ee_results, et_results)