def process_file(gold_dir, fname): infile = os.path.join(gold_dir, fname) source_parser = create_source_parser(options) tarsqidoc = source_parser.parse_file(infile) (ee_vectors, et_vectors) = collect_tarsqidoc_vectors(tarsqidoc) tlinks = collect_tlinks(tarsqidoc) add_reltype_to_vectors(tlinks, ee_vectors, et_vectors) write_vectors(ee_vectors, et_vectors)
def load(infile): options = tarsqi.Options([('--source', 'timebank')]) source_parser = create_source_parser(options) metadata_parser = create_metadata_parser(options) docstructure_parser = create_docstructure_parser() tarsqidoc = source_parser.parse_file(infile) metadata_parser.parse(tarsqidoc) docstructure_parser.parse(tarsqidoc) tarsqidoc.add_options(options) for tagname in TIMEML_TAGS: tarsqidoc.tags.import_tags(tarsqidoc.source.tags, tagname) tarsqidoc.source.tags.remove_tags(tagname) return tarsqidoc
def __init__(self, opts, infile, outfile): """Initialize Tarsqi object conform the data source identifier and the processing options. Does not set the instance variables related to the document model and the meta data. The opts argument has a list of commanid line options and the infile and outfile arguments are typically absolute paths, but they can be None when we are processing strings.""" # Make sure we're in the right directory. If the toolkit crashed on a # previous file we may be in a different directory. os.chdir(TTK_ROOT) self.input = infile self.output = outfile self.basename = _basename(infile) if infile else None self.options = Options(opts) if self.options.loglevel: logger.set_level(self.options.loglevel) self.DIR_TMP_DATA = os.path.join(TTK_ROOT, 'data', 'tmp') self.components = COMPONENTS self.source_parser = create_source_parser(self.options) self.metadata_parser = create_metadata_parser(self.options) self.docstructure_parser = create_docstructure_parser() self.pipeline = self._create_pipeline()
def _initialize_parsers(self): self.source_parser = create_source_parser(self.options) self.metadata_parser = create_metadata_parser(self.options) self.docstructure_parser = create_docstructure_parser()