def thereader(infile, reffiles): for (sent, refs) in itertools.izip(sgml.read_raw(infile), itertools.izip(*reffiles)): sent.refs = [ref.split() for ref in refs] yield sent
def thereader(infile, reffiles): for (sent, refs) in itertools.izip(sgml.read_raw(infile), itertools.izip(*reffiles)): sent.refs = [ref.split() for ref in refs] yield sent
if log.level >= 1: log.write("Reading configuration from %s\n" % opts.config) execfile(opts.config) if len(args) >= 1 and args[0] != "-": input_file = file(args[0], "r") else: input_file = sys.stdin if len(args) >= 2 and args[1] != "-": output_file = file(args[1], "w") else: output_file = sys.stdout gc.collect() if log.level >= 1: log.write("all structures loaded, memory %s, time %s\n" % (monitor.memory(), monitor.cpu())) log.write("models: %s\n" % (" ".join(str(x.name) for x in models))) sents = sgml.read_raw(input_file) for sent in sents: mark = sent.getmark() if mark is not None: (tag, attrs) = mark if tag == "seg": sent.unmark() dattrs = sgml.attrs_to_dict(attrs) sent.meta = attrs extract_grammar(sent)