# basename = args[0] # doc = annodisAnnot(basename+".xml") # feats = FeatureMap(basename+".features") # prep = Preprocess( basename+".txt.prep.xml") if options.merge: feats = FeatureMap("", empty = True) feats.init_from_dir(".", suffix = options.merge) basename = "no base file to consider" else: if True: basename = args[0] feats = FeatureMap(basename + ".features", weird = options.weird) feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2]) if allfuncs != {}: doc = annodisAnnot(basename + ".xml") prep = Preprocess(basename + ".txt.prep.xml") doc.add_preprocess(prep) if options.voisins: doc._voisins = get_voisins_dict(table, doc._vocab) for entry in doc._voisins: doc._voisins[entry] = dict(doc._voisins[entry]) else: print >> sys.stderr, "Usage: script file-basename ?", args sys.exit(0) if options.merge: feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2, "m#FILE"]) else: feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2])
# basename = args[0] # doc = annodisAnnot(basename+".xml") # feats = FeatureMap(basename+".features") # prep = Preprocess( basename+".txt.prep.xml") if options.merge: feats = FeatureMap("", empty=True) feats.init_from_dir(".", suffix=options.merge) basename = "no base file to consider" else: if True: basename = args[0] feats = FeatureMap(basename + ".features", weird=options.weird) feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2]) if allfuncs != {}: doc = annodisAnnot(basename + ".xml") prep = Preprocess(basename + ".txt.prep.xml") doc.add_preprocess(prep) if options.voisins: doc._voisins = get_voisins_dict(table, doc._vocab) for entry in doc._voisins: doc._voisins[entry] = dict(doc._voisins[entry]) else: print >> sys.stderr, "Usage: script file-basename ?", args sys.exit(0) if options.merge: feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2, "m#FILE"]) else: feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2])
import codecs import re import xml.etree.ElementTree as ET from AnnodisReader import annodisAnnot, Preprocess from Lookup import Gazetteer def includes(pos1,pos2): a1,b1=pos1 a2,b2=pos2 return a1<=a2 and b2<=b1 if __name__=="__main__": try: doc=annodisAnnot(sys.argv[1]) except: print "ERROR reading file:", sys.argv[1] sys.exit(0) try: prep=Preprocess(sys.argv[1].split(".xml")[0]+".txt.prep.xml") doc.add_preprocess(prep) except: print "ERROR reading prepocessed file for", sys.argv[1] sys.exit(0) lexicon=Gazetteer(sys.argv[2]) txt=doc.text() lookup=lexicon.tag(txt)