def gold_singletons(base_directory): nps = reconcile.getNPs_annots(base_directory) golds = reconcile.parseGoldAnnots(base_directory) existentials = [] for n in nps: for g in golds: if n.contains(g) or g.contains(n): break else: if n not in existentials: existentials.append(n) return existentials
# File Name : null-hunter.py # Purpose : # Creation Date : 12-22-2011 # Last Modified : Thu 22 Dec 2011 11:38:14 AM MST # Created By : Nathan Gilbert # import sys from pyconcile.document import Document from pyconcile import reconcile if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: %s <filelist>" % (sys.argv[0])) sys.exit(1) fList = open(sys.argv[1], 'r') for f in fList: if f.startswith("#"): continue f = f.strip() print("Working on document: %s" % f) nps = reconcile.getNPs_annots(f) for np in nps: if np.getText() == "": print(np) fList.close()
default=-1) parser.add_option("-2", "--anaphor", help="The anaphor id", action="store", dest="anaphor", type="int", default=-1) (options, args) = parser.parse_args() if (len(sys.argv) < 2) or ((options.treefile == "") and \ (options.featurefile == "")): parser.print_help() sys.exit(1) nps = reconcile.getNPs_annots(options.directory) antecedent = nps.getAnnotByID(options.antecedent) anaphor = nps.getAnnotByID(options.anaphor) #print antecedent.ppprint() #print anaphor.ppprint() features = reconcile.getFeatures(options.directory, options.featurefile) key = "%d,%d" % (options.antecedent, options.anaphor) pair_features = features[key] #for k in sorted(pair_features.keys()): # print "%s = %s" % (k, str(pair_features[k])) #read in the tree treeFile = open(options.treefile, 'r')
#!/usr/bin/python # File Name : gold_np_overlap.py # Purpose : Prints out the response nps that overlap the gold NPs # Creation Date : 11-22-2011 # Last Modified : Tue 22 Nov 2011 04:16:14 PM MST # Created By : Nathan Gilbert # import sys from pyconcile import reconcile if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: %s <dir>" % (sys.argv[0])) sys.exit(1) #read in the response nps response_nps = reconcile.getNPs_annots(sys.argv[1]) #read in the gsNPs file and find the matched gs_nps = reconcile.getGSNPs(sys.argv[1]) for r_np in response_nps: for g_np in gs_nps: if g_np.getATTR("MATCHED") == r_np.getID(): print("C :%s" % r_np.pprint()) break else: print("I :%s" % r_np.pprint())
fileList = open(sys.argv[1], 'r') anaphoric_nominals = annotation_set.AnnotationSet("anaphoric_noms") existential_nominals = annotation_set.AnnotationSet("exist_noms") for f in fileList: f = f.strip() if f.startswith("#"): continue print("Working on %s" % f) gold_annots = reconcile.parseGoldAnnots(f) gold_chains = reconcile.getGoldChains(f) response_nps = reconcile.getNPs_annots(f) pos = reconcile.getPOS(f, True) reconcile.addSundanceProps(f, response_nps) utils.match_nps(gold_annots, response_nps) for g in gold_annots: if g.getATTR("GOLD_TYPE") != "NOM": continue for r in response_nps: if g.getATTR("MATCHED") == r.getID(): g.addProps(r.getProps()) if g.getATTR("GRAMMAR") == "SUBJECT" or g.getATTR( "SUN_ROLE") == "SUBJ": g.setProp("S_VERB", reconcile.getSubjVerb(g, pos))
def gold_annotations(f): """process the file with gold annotations""" global virtual_pronouns, total_counts, virtual_pronoun_heads, \ nominal_base_antecedent, distance_from_antecedent doc = Document(f) gold_chains = reconcile.getGoldChains(f) #adding in Sundance nes. nes = reconcile.getNEs(f, True) add_reconcile_semantic_class(gold_chains, nes) #adding in Reconcile pos too. pos = reconcile.getPOS(f, True) #getting the docs nps reconcile_nps = reconcile.getNPs_annots(f) #getting sundance nps sundance_nps = reconcile.getSundanceNPs(f) add_sundance_nps(gold_chains, sundance_nps) original_text_heads = {} # just getting the heads original_text = defaultdict(list) # for getting total doc counts later. nominal2chains = defaultdict( list) # the chains that a given nominal appears. for chain in list(gold_chains.keys()): base_antecedent = True prev_annot = None antecedents = 0 for mention in gold_chains[chain]: #if the first antecedent in a chain, do not list it as anaphoric. if base_antecedent: if mention.getATTR("is_nominal") and not \ mention.getATTR("GOLD_SINGLETON"): text = mention.getText() text_lower = mention.getATTR("TEXT_CLEAN").lower() docs_appeared[text_lower].append(f) nominal_base_antecedent[text_lower] = \ nominal_base_antecedent.get(text_lower, 0) + 1 original_text[text_lower].append(text) #take note that this chain contained this nominal nominal2chains[text_lower].append(chain) #take note of the gold semantic class gold_semantic_class[text_lower].append( mention.getATTR("GOLD_SEMANTIC")) #reconcile's semantic class reconcile_semantic_class[text_lower].append( mention.getATTR("NE_CLASS")) #sundance's semantic class sun_semantic_class[text_lower].append( mention.getATTR("SUN_SEMANTIC")) number_gold_antecedents[text_lower].append(antecedents) #get verb stats if mention.getATTR("ROLE") == "SUBJ": verb = reconcile.getSubjVerb(mention, pos) if verb != None: subj_verbs[text_lower].append(verb.lower()) elif mention.getATTR("ROLE") == "DOBJ": verb = reconcile.getObjVerb(mention, pos) if verb != None: obj_verbs[text_lower].append(verb.lower()) base_antecedent = False prev_annot = mention antecedents += 1 continue if mention.getATTR("is_nominal"): text = mention.getText() text_lower = mention.getATTR("TEXT_CLEAN").lower() head_text = mention.getATTR("HEAD_TEXT") original_text[text_lower].append(text) virtual_pronouns[text_lower] = \ virtual_pronouns.get(text_lower, 0) + 1 virtual_pronoun_heads[head_text.lower()] = \ virtual_pronoun_heads.get(head_text.lower(), 0) + 1 #the semantic class Reconcile puts this in. reconcile_semantic_class[text_lower].append( mention.getATTR("NE_CLASS")) #register this doc as containing this np. docs_appeared[text_lower].append(f) #take note that this chain contained this nominal nominal2chains[text_lower].append(chain) #take note of the gold semantic class gold_semantic_class[text_lower].append( mention.getATTR("GOLD_SEMANTIC")) #the number of possible correct antecedents for this anaphor number_gold_antecedents[text_lower].append(antecedents) #sundance's semantic class sun_semantic_class[text_lower].append( mention.getATTR("SUN_SEMANTIC")) # subject verb statistics if mention.getATTR("ROLE") == "SUBJ": verb = reconcile.getSubjVerb(mention, pos) subj_verbs[text_lower].append(verb.lower()) elif mention.getATTR("ROLE") == "DOBJ": verb = reconcile.getObjVerb(mention, pos) obj_verbs[text_lower].append(verb.lower()) #get the sentence distance from these two mentions. mention_sent = reconcile.getAnnotSentence(f, mention) prev_sent = reconcile.getAnnotSentence(f, prev_annot) if mention_sent > -1 and prev_sent > -1: distance_from_antecedent[text_lower].append(mention_sent - \ prev_sent) #get the TextTiling segment distance for the two mentions mention_seg = doc.getAnnotTile(mention) prev_seg = doc.getAnnotTile(prev_annot) if mention_seg > -1 and prev_seg > -1: focus_distance[text_lower].append(mention_seg - \ prev_seg) #getting the distribution of closest antecedent types for a #given nominal if prev_annot.getATTR("is_nominal"): nominals2type[text_lower]["nominal"] = \ nominals2type[text_lower].get("nominal",0) + 1 elif prev_annot.getATTR("is_pronoun"): nominals2type[text_lower]["pronoun"] = \ nominals2type[text_lower].get("pronoun",0) + 1 else: nominals2type[text_lower]["proper"] = \ nominals2type[text_lower].get("proper",0) + 1 prev_annot = mention antecedents += 1 #for key in nominal2chains.keys(): # print "%d : %s (doc: %s)" % (len(list(set(nominal2chains[key]))), key, # doc) #update the total counts. for key in list(original_text.keys()): for text in list(set(original_text[key])): total_counts[key] = total_counts.get(key, 0) + doc.getWordCounts(text) #the head counts for key in list(virtual_pronoun_heads.keys()): total_counts_heads[key] = total_counts_heads.get(key, 0) + \ doc.getWordCounts(key)