Esempio n. 1
0
def process_file(gold_dir, fname):
    infile = os.path.join(gold_dir, fname)
    source_parser = create_source_parser(options)
    tarsqidoc = source_parser.parse_file(infile)
    (ee_vectors, et_vectors) = collect_tarsqidoc_vectors(tarsqidoc)
    tlinks = collect_tlinks(tarsqidoc)
    add_reltype_to_vectors(tlinks, ee_vectors, et_vectors)
    write_vectors(ee_vectors, et_vectors)
Esempio n. 2
0
 def process_future(self):
     """This is an alternative way to do process() that is not used yet. The
     difference is that it uses subprocess instead of os.system() and that it
     pipes each line to the classifier, not using any temporary files. It has
     one weird problem, which is that when we process the very first line the
     identifier is missing from the output."""
     # TODO: when this is tested enough let it replace process()
     (ee_vectors, et_vectors) \
         = vectors.collect_tarsqidoc_vectors(self.tarsqidoc)
     mc = mallet.MalletClassifier(self.mallet)
     mc.add_classifiers(self.ee_model, self.et_model)
     ee_in = [str(v) for v in ee_vectors]
     et_in = [str(v) for v in et_vectors]
     ee_results = mc.classify_vectors(self.ee_model, ee_in)
     et_results = mc.classify_vectors(self.et_model, et_in)
     self._add_links_future(ee_results, et_results)
Esempio n. 3
0
 def process_future(self):
     """This is an alternative way to do process() that is not used yet. The
     difference is that it uses subprocess instead of os.system() and that it
     pipes each line to the classifier, not using any temporary files. It has
     one weird problem, which is that when we process the very first line the
     identifier is missing from the output."""
     # TODO: when this is tested enough let it replace process()
     (ee_vectors, et_vectors) \
         = vectors.collect_tarsqidoc_vectors(self.tarsqidoc)
     mc = mallet.MalletClassifier(self.mallet)
     mc.add_classifiers(self.ee_model, self.et_model)
     ee_in = [str(v) for v in ee_vectors]
     et_in = [str(v) for v in et_vectors]
     ee_results = mc.classify_vectors(self.ee_model, ee_in)
     et_results = mc.classify_vectors(self.et_model, et_in)
     self._add_links_future(ee_results, et_results)