def onlineparse_others(self): ofp = OnlineFanseParser(w_dir=self.tmppath_op) ofp.check_running() try: n_all = len(self.processedcorpus_others) c = 0 for name, doc in self.processedcorpus_others.iteritems(): c += 1 for idx, sent in enumerate(doc["gold_text"]): print "Parsing and getting PAS tags... (%i of %i)"%(c, n_all) logging.debug(pformat("Parsing and getting PAS tags... (%i of %i)"%(c, n_all))) parsed = ofp.parse_one(sent) parsed_t = [tuple(l.split("\t")) for l in parsed] doc["gold_tags"].append(parsed_t) doc["RVtest_tags"].append(parsed_t) pe = pas_extractor.OnlinePasExtractor(parsed) doc["RVtest_PAS"].append(pe.extract_full()) doc["gold_PAS"].append(pe.extract_full()) logging.debug(pformat(doc["RVtest_PAS"])) logging.debug(pformat(doc["gold_tags"])) except KeyboardInterrupt: print "Interrupted... aborting parsing." pass finally: # ofp.clean() pass if self.outputname == "": outputname = "fce_processed_v2VB.pickle3" else: outputname = self.outputname with open(os.path.join(os.path.dirname(self.path), outputname), "wb") as cf: pickle.dump(self.processedcorpus_others, cf, -1) return True
def onlineparse_RV(self): ofp = OnlineFanseParser(w_dir=self.tmppath_op) ofp.check_running() try: n_all = len(self.processedcorpus) c = 0 for name, doc in self.processedcorpus.iteritems(): c += 1 for idx, sent in enumerate(doc["gold_text"]): print "Parsing and getting PAS tags... (%i of %i)"%(c, n_all) logging.debug(pformat("Parsing and getting PAS tags... (%i of %i)"%(c, n_all))) print pformat(sent) print pformat(doc["RVtest_text"][idx]) # try: g_parsed = ofp.parse_one(sent) g_parsed_t = [tuple(l.split("\t")) for l in g_parsed] doc["gold_tags"].append(g_parsed_t) t_parsed = ofp.parse_one(doc["RVtest_text"][idx]) t_parsed_t = [tuple(l.split("\t")) for l in t_parsed] doc["RVtest_tags"].append(t_parsed_t) g_pe = pas_extractor.OnlinePasExtractor(g_parsed) t_pe = pas_extractor.OnlinePasExtractor(t_parsed) doc["RVtest_PAS"].append(g_pe.extract_full()) doc["gold_PAS"].append(t_pe.extract_full()) # logging.debug(pformat(doc["RVtest_PAS"])) # logging.debug(pformat(doc["gold_tags"])) # except TypeError, e: # print pformat(e) # print "error occured in ", doc["RVtest_text"] # except Exception as oe: # print pformat(oe) # print "error occured in ", doc["RVtest_text"] except KeyboardInterrupt: print "Interrupted... aborting parsing." pass except Exception, e: print pformat(e) raise