def processACE(f, np, heads2qp): ace_annots = reconcile.parseGoldAnnots(f) stanford_deps = reconcile.getStanfordDep(f) gold_chains = reconcile.getGoldChains(f) ace_np = ace_annots.getAnnotBySpan(np.getStart(), np.getEnd()) if ace_np["is_nominal"]: head = utils.textClean(ace_np["HEAD"].strip().lower()) text = utils.textClean(np.getText()) #bookkeeping if head not in list(heads2qp.keys()): heads2qp[head] = QuasiPronoun(head) else: heads2qp[head].updateDocs(f) heads2qp[head].updateCount() if ace_np["GOLD_SINGLETON"]: heads2qp[head].singelton += 1 else: #does it start the chain? for gc in list(gold_chains.keys()): if gold_chains[gc][0] == np: heads2qp[head].starts_chain += 1 break process(f, np, head, text, heads2qp, stanford_deps)
def getACECommonNouns(f): nps = reconcile.getNPs(f) gold_nps = reconcile.parseGoldAnnots(f) common_nps = AnnotationSet("common_nouns") for np in nps: gold_np = gold_nps.getAnnotBySpan(np.getStart(), np.getEnd()) if not gold_np["GOLD_SINGLETON"] and gold_np["is_nominal"]: common_nps.add(np) return common_nps
def gold_singletons(base_directory): nps = reconcile.getNPs_annots(base_directory) golds = reconcile.parseGoldAnnots(base_directory) existentials = [] for n in nps: for g in golds: if n.contains(g) or g.contains(n): break else: if n not in existentials: existentials.append(n) return existentials
def processACE(f, head2qp): global USE_GOLD ace_annots = reconcile.parseGoldAnnots(f) nps = reconcile.getNPs(f) stanford_deps = reconcile.getStanfordDep(f) gold_chains = reconcile.getGoldChains(f) for np in nps: ace_np = ace_annots.getAnnotBySpan(np.getStart(), np.getEnd()) head = None text = None if PRONOUNS: if qp_utils.isPronoun(np): head = ace_np["HEAD"].lower() text = np.getText() else: continue else: if ace_np["is_nominal"]: head = utils.textClean(ace_np["HEAD"].strip().lower()) text = utils.textClean(np.getText()) else: continue #bookkeeping if head not in list(head2qp.keys()): head2qp[head] = QuasiPronoun(head) else: head2qp[head].updateDocs(f) head2qp[head].updateCount() if ace_np["GOLD_SINGLETON"]: head2qp[head].singleton += 1 if (text.startswith("a ") or text.startswith("an ")): head2qp[head].faux_ba += 1 else: #does it start the chain? if USE_GOLD: process_gold(f, np, head, text, head2qp, gold_chains) process_syntax(f, np, head, text, head2qp, stanford_deps)
#set up for bare definites # ACE_HEADS.append("the " + line) # ACE_HEADS.append("that " + line) # ACE_HEADS.append("this " + line) # used.append(line) #set up for all commons #ACE_HEADS.append(line) for f in files: f = f.strip() print("Working on file: {0}".format(f)) this_files_common_nouns = [] if ACE: tokens = reconcile.getTokens(f) pos = reconcile.getPOS(f) ace_annots = reconcile.parseGoldAnnots(f) this_files_common_nouns_orig = [] collectACEFPs(ace_annots, this_files_common_nouns_orig) #remove post modded commons for fp in this_files_common_nouns_orig: if not checkForModification(fp, tokens, pos): this_files_common_nouns.append(fp) else: gold_nps = reconcile.getNPs(f) collectFPs(gold_nps, this_files_common_nouns) #output common nouns to file i = 0 with open(f + "/annotations/faux_pronouns", 'w') as outFile: for annot in this_files_common_nouns:
# Created By : Nathan Gilbert # import sys from pyconcile import reconcile from pyconcile import data if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: %s <first-argument>" % (sys.argv[0])) sys.exit(1) V = [] fList = open(sys.argv[1], 'r') for f in fList: f = f.strip() if f.startswith("#"): continue gold_annots = reconcile.parseGoldAnnots(f, True) for g in gold_annots: text = g.getText().replace("\n", "").replace("\r", "") if text in data.ALL_PRONOUNS: continue if text not in V: V.append(text) for v in V: print("\"%s\"," % v)
stats_file = open(sys.argv[2], 'r') for line in stats_file: if line.startswith("TEXT:"): text = line.replace("TEXT:", "").strip() commons.append(text) stats_file.close() test_docs = open(sys.argv[1], 'r') found_gold = 0 found_response = 0 total_gold_nps = 0 total_response_nps = 0 for doc in test_docs: doc = doc.strip() gold_nps = reconcile.parseGoldAnnots(doc) response_nps = reconcile.getNPs_annots(doc) for a in gold_nps: total_gold_nps += 1 #print a.getATTR("TEXT_CLEAN").lower() if a.getATTR("TEXT_CLEAN").lower() in commons: found_gold += 1 for a in response_nps: total_response_nps += 1 if a.getATTR("TEXT_CLEAN").lower() in commons: found_response += 1 test_docs.close() print("Found: %d/%d (%0.2f) from stats" %
text2docs2spans = defaultdict(dict) class2annots = defaultdict(list) #text -> semantic_class -> decision cache = defaultdict(dict) readInCache(cache) needed_annotations = 0 for f in filelist: if f.startswith("#"): continue f = f.strip() gold_annots = reconcile.parseGoldAnnots(options.dir + "/" + f) annots = getSpecificNPs(gold_annots) needed_annotations = len(annots) #for a in annots: # if a.getATTR("GOLD_SEMANTIC") in ("ORGANIZATION", "GPE"): # class2annots["ORG"].append(a) # elif a.getATTR("GOLD_SEMANTIC") == "PERSON": # class2annots["PER"].append(a) # elif a.getATTR("GOLD_SEMANTIC") == "LOC": # class2annots["LOC"].append(a) #actually perform the annotations #for sem in class2annots.keys(): #check the cache to make see if we already have an answer num = 0