def main(argv): if len(argv) != 4: print >> sys.stderr, 'Usage: hoccorpus2posneg INDIR LABEL OUTFN' return 1 indir, label, outfn = argv[1:] documents = load_hoccorpus(indir) posfn, negfn = outfn + '.pos', outfn + '.neg' with io.open(posfn, 'wt', encoding='utf-8') as posout: with io.open(negfn, 'wt', encoding='utf-8') as negout: for d in documents: for s in d.sentences: out = posout if label in s.labels else negout out.write(s.text + u'\n')
def main(argv): if len(argv) != 3: print >> sys.stderr, 'Usage: hoccorpus2ann INDIR OUTDIR' return 1 indir, outdir = argv[1:] if not path.isdir(outdir): print >> sys.stderr, '%s is not a directory' % outdir return 1 documents = load_hoccorpus(indir) for d in documents: txtout = path.join(outdir, d.id+'.txt') with io.open(txtout, 'wt', encoding='utf-8') as out: out.write(d.text) annout = path.join(outdir, d.id+'.ann') with io.open(annout, 'wt', encoding='utf-8') as out: out.write(u'\n'.join(d.to_standoff()))