def sr_parse(self, doc, bcvocab=None): """ Shift-reduce RST parsing based on model prediction :type texts: list of string :param texts: list of EDUs for parsing :type bcvocab: dict :param bcvocab: brown clusters """ # raise NotImplementedError("Not finished yet") # Initialize parser srparser = SRParser([], []) srparser.init(doc) # Parsing while not srparser.endparsing(): # Generate features stack, queue = srparser.getstatus() # Make sure call the generator with # same arguments as in data generation part fg = FeatureGenerator(stack, queue, doc, bcvocab) feat = fg.features() # label = self.predict(feat) labels = self.rank_labels(feat) for label in labels: action = label2action(label) try: srparser.operate(action) break except ActionError: # print "Parsing action error with {}".format(action) pass tree = srparser.getparsetree() rst = RSTTree() rst.asign_tree(tree) return rst
def builddata(self, path): """ Build a list of feature list from a given path :type path: string :param path: data path, where all data files are saved """ files = [os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('.dis')] for fname in files: rst = RSTTree(fname=fname) rst.build() actionlist, samplelist = rst.generate_samples() self.actionlist += actionlist self.samplelist += samplelist
def evalparser(path='./examples', report=False, bcvocab=None, draw=True, withdp=False, fdpvocab=None, fprojmat=None): """ Test the parsing performance :type path: string :param path: path to the evaluation data :type report: boolean :param report: whether to report (calculate) the f1 score """ # ---------------------------------------- # Load the parsing model print 'Load parsing model ...' pm = ParsingModel(withdp=withdp, fdpvocab=fdpvocab, fprojmat=fprojmat) pm.loadmodel("model/parsing-model.pickle.gz") # ---------------------------------------- # Evaluation met = Metrics(levels=['span','nuclearity','relation']) # ---------------------------------------- # Read all files from the given path doclist = [joinpath(path, fname) for fname in listdir(path) if fname.endswith('.merge')] for fmerge in doclist: # ---------------------------------------- # Read *.merge file dr = DocReader() doc = dr.read(fmerge) # ---------------------------------------- # Parsing pred_rst = pm.sr_parse(doc, bcvocab) if draw: strtree = pred_rst.parse() drawrst(strtree, fmerge.replace(".merge",".ps")) # Get brackets from parsing results pred_brackets = pred_rst.bracketing() fbrackets = fmerge.replace('.merge', '.brackets') # Write brackets into file writebrackets(fbrackets, pred_brackets) # ---------------------------------------- # Evaluate with gold RST tree if report: fdis = fmerge.replace('.merge', '.dis') gold_rst = RSTTree(fdis, fmerge) gold_rst.build() gold_brackets = gold_rst.bracketing() met.eval(gold_rst, pred_rst) if report: met.report()
def sr_parse(self, texts): """ Shift-reduce RST parsing based on model prediction :type texts: list of string :param texts: list of EDUs for parsing """ # Initialize parser srparser = SRParser([],[]) srparser.init(texts) # Parsing while not srparser.endparsing(): # Generate features stack, queue = srparser.getstatus() # Make sure call the generator with # same arguments as in data generation part fg = FeatureGenerator(stack, queue) features = fg.features() labels = self.predict(features) # Enumerate through all possible actions ranked based on predcition scores for i,label in enumerate(labels): action = label2action(label) try: srparser.operate(action) break # if legal action, end the loop except ActionError: if i < len(labels): # if not a legal action, try the next possible action continue else: print "Parsing action error with {}".format(action) sys.exit() tree = srparser.getparsetree() rst = RSTTree(tree=tree) return rst
def builddata(self, rpath): """ Build a list of feature list from a given path :type path: string :param path: data path, where all data files are saved """ # Read RST tree file files = [os.path.join(rpath, fname) for fname in os.listdir(rpath) if fname.endswith('.dis')] for fdis in files: print 'Processing data from file: {}'.format(fdis) fmerge = fdis.replace('.dis', '.merge') rst = RSTTree(fdis, fmerge) rst.build() actionlist, samplelist = rst.generate_samples(self.bcvocab) self.actionlist += actionlist self.samplelist += samplelist
def sr_parse(self, texts): """ Shift-reduce RST parsing based on model prediction :type texts: list of string :param texts: list of EDUs for parsing """ # Initialize parser srparser = SRParser([], []) srparser.init(texts) # Parsing while not srparser.endparsing(): # Generate features stack, queue = srparser.getstatus() # Make sure call the generator with # same arguments as in data generation part fg = FeatureGenerator(stack, queue) features = fg.features() label = self.predict(features) action = label2action(label) # The best choice here is to choose the first # legal action try: srparser.operate(action) except ActionError: print "Parsing action error with {}".format(action) sys.exit() tree = srparser.getparsetree() rst = RSTTree(tree=tree) return rst
def evalparser(path='./examples', report=False): """ Test the parsing performance :type path: string :param path: path to the evaluation data :type report: boolean :param report: whether to report (calculate) the f1 score """ from os import listdir from os.path import join as joinpath # ---------------------------------------- # Load the parsing model pm = ParsingModel() pm.loadmodel("parsing-model.pickle.gz") # ---------------------------------------- # Evaluation met = Metrics(levels=['span', 'nuclearity', 'relation']) # ---------------------------------------- # Read all files from the given path doclist = [ joinpath(path, fname) for fname in listdir(path) if fname.endswith('.edus') ] for fedus in doclist: # ---------------------------------------- # Parsing pred_rst = parse(pm, fedus=fedus) # Get brackets from parsing results # print fedus fin = open("test.dis", "w") r = fin.write(str(pred_rst)) # pred_brackets = pred_rst.bracketing() # fbrackets = fedus.replace('edus', 'brackets') # writebrackets(fbrackets, pred_brackets) # ---------------------------------------- # Evaluate with gold RST tree if report: fdis = fedus.replace('edus', 'dis') gold_rst = RSTTree(fname=fdis) gold_rst.build() gold_brackets = gold_rst.bracketing() met.eval(gold_rst, pred_rst) if report: met.report()
def evalparser(path='./examples', report=False): """ Test the parsing performance :type path: string :param path: path to the evaluation data :type report: boolean :param report: whether to report (calculate) the f1 score """ from os import listdir from os.path import join as joinpath # ---------------------------------------- # Load the parsing model pm = ParsingModel() pm.loadmodel("parsing-model.pickle.gz") # ---------------------------------------- # Evaluation met = Metrics(levels=['span','nuclearity','relation']) # ---------------------------------------- # Read all files from the given path doclist = [joinpath(path, fname) for fname in listdir(path) if fname.endswith('.edus')] for fedus in doclist: # ---------------------------------------- # Parsing fpos = fedus + ".pos" d_pos = get_d_pos(fpos) fdep = fedus + ".dep" d_dep = get_d_dep(fdep) pred_rst = parse(pm, fedus=fedus, d_pos=d_pos, d_dep=d_dep) # Get brackets from parsing results pred_brackets = pred_rst.bracketing() fbrackets = fedus.replace('edus', 'brackets') writebrackets(fbrackets, pred_brackets) # ---------------------------------------- # Evaluate with gold RST tree if report: fdis = fedus.replace('edus', 'dis') gold_rst = RSTTree(fname=fdis) gold_rst.build() gold_brackets = gold_rst.bracketing() met.eval(gold_rst, pred_rst) if report: met.report()
def sr_parse(self, texts,fname): """ Shift-reduce RST parsing based on model prediction :type texts: list of string :param texts: list of EDUs for parsing """ # Initialize parser srparser = SRParser([],[]) dep = defaultdict() pos = defaultdict() lines =defaultdict() # print fname.split(".dis")[0]+'.dep' dir = fname.split s =fname.split(".edus") # print fname # st= fname if fname.endswith(".out.edus"): # print "yes" s= fname.split(".out.edus") f= open(s[0]+'.dep',"r") data = f.read().splitlines() for line in data: # print line l = line.split('@#%^&*') dep[l[0]] = l[1] f= open(s[0]+'.pos',"r") data = f.read().splitlines() for line in data: # print line l = line.split('@#%^&*') pos[l[0]] = l[1].strip() f= open(s[0]+'.line',"r") data = f.read().splitlines() for line in data: # print line l = line.split('@#%^&*') lines[l[0]] = l[1] srparser.init(texts,pos,dep,lines) # Parsing while not srparser.endparsing(): # Generate features stack, queue = srparser.getstatus() # Make sure call the generator with # same arguments as in data generation part fg = FeatureGenerator(stack, queue) features = fg.features() labels = self.predict(features) # Enumerate through all possible actions ranked based on predcition scores for i,label in enumerate(labels): action = label2action(label) try: srparser.operate(action) break # if legal action, end the loop except ActionError: if i < len(labels): # if not a legal action, try the next possible action continue else: print "Parsing action error with {}".format(action) sys.exit() tree = srparser.getparsetree() rst = RSTTree(tree=tree) return rst