def single_ann_measures(a, kirchhoff=False): c = Counter() c['lexnodes'] = len(a.lexnodes) c['1W'] = sum(1 for n in a.lexnodes if len(n.tokens)==1) c['MW'] = sum(1 for n in a.lexnodes if len(n.tokens)>1) c['omittedtoks'] = len(a.alltokens)-sum(len(n.tokens) for n in a.lexnodes) c['coordnodes'] = len(a.coordnodes) c['anaphlinks'] = len(a.anaphlinks) c['FNs'] = len(a.fenodes) c['explicitly rooted utterances'] = len(a.root.children) #print({n: n.depth for n in a.nodes}) simplify_coord(a) c['projective'] = int(a.isProjective) # literal number of connected components in the graph (not counting the root when nothing was explicitly attached to it) c['fragments'] = len({n.frag for n in a.nodes if n.frag.nodes!={a.root}}) assert c['fragments']>0,(a.nodes,a.lexnodes) c['max utterances'] = max(1,c['explicitly rooted utterances'])+(c['fragments']-1) c['min utterances'] = max(1,c['explicitly rooted utterances']) assert c['max utterances']>=c['min utterances'],c upward(a) downward(a) c['possible utterance heads'] = sum(int(a.root in n.parentcandidates) for n in a.lexnodes) promcom(a,c, kirchhoff=kirchhoff) return c
def iapromcom(a1, a2, c, escapebrackets=False, kirchhoff=False): ''' Measures local compatibility of constraints in two annotations. Assumes single_ann_measures() has already been run independently on the two annotation graphs. See merge_annotations.py to measure global compatibility of two annotations (i.e. reasoning over the entire structure). ''' a1J, a2J = a1.to_json_simplecoord(), a2.to_json_simplecoord() #print() #print(a1J) #print() #print(a2J) m = merge([a1J, a2J], updatelex=True, escapebrackets=escapebrackets) a1U = FUDGGraph(a1J) a2U = FUDGGraph(a2J) upward(a1U) downward(a1U) upward(a2U) downward(a2U) for n in a1U.lexnodes | a2U.lexnodes: assert n.json_name in a1U.nodesbyname,(n.json_name,m,'-------------------',a1J) assert n.json_name in a2U.nodesbyname,(n.json_name,m,'-------------------',a2J) #assert n.json_name in m['node2words'] or (n.json_name.startswith('MW(') and 'FE'+n.json_name in m['node2words']),(n.json_name,m) jointSuppParents = {n.name: {p.json_name for p in a1U.nodesbyname[n.json_name].parentcandidates} & {p.json_name for p in a2U.nodesbyname[n.json_name].parentcandidates} for n in (a1U.lexnodes|a2U.lexnodes)} # compute single-annotation commitment (w/ compatible lexical level) a1C, a2C = Counter(), Counter() promcom(a1U, a1C, kirchhoff=kirchhoff) promcom(a2U, a2C, kirchhoff=kirchhoff) a1com, a2com = a1C['commitment'], a2C['commitment'] #if float(a2com)<1 and '"~2' not in a2U.alltokens: # promcom(a2U, a2C, kirchhoff=kirchhoff, debug=True) # assert False,(a2U.alltokens,a2C,a2.lexnodes) numer = sum(len(jointpars) for jointpars in jointSuppParents.values()) c['softprec_1|2'] = ValueStats(numer/sum(len(n.parentcandidates) for n in a1U.lexnodes)) c['softprec_2|1'] = ValueStats(numer/sum(len(n.parentcandidates) for n in a2U.lexnodes)) if math.isnan(float(a1com)) or math.isnan(float(a2com)): assert a1C['spanning tree overflow'] or a2C['spanning tree overflow'],('internally inconsistent:', a1C if math.isnan(float(a1com)) else a2C) c['a1com'] = ValueStats() c['a2com'] = ValueStats() c['softcomprec_1|2'] = ValueStats() c['softcomprec_2|1'] = ValueStats() c['softcomprec_discarded'] = 1 c['too_many_spanning_trees'] = 1 c['comprec_1|2'] = ValueStats() c['comprec_2|1'] = ValueStats() return else: assert 0.0<=float(a1com)<=1.0,float(a1com) assert 0.0<=float(a2com)<=1.0,float(a2com) c['a1com'] = ValueStats(a1com) c['a2com'] = ValueStats(a2com) c['softcomprec_1|2'] = ValueStats(float(a1com)*float(c['softprec_1|2'])) c['softcomprec_2|1'] = ValueStats(float(a2com)*float(c['softprec_2|1'])) try: ma = FUDGGraph(m) try: upward(ma) downward(ma) maC = Counter() try: promcom(ma, maC, kirchhoff=kirchhoff) maprom = maC['promiscuity'] macom = maC['commitment'] c['comprec_1|2'] = ValueStats(float(macom)*float(maprom)/float(a1C['promiscuity'])) c['comprec_2|1'] = ValueStats(float(macom)*float(maprom)/float(a2C['promiscuity'])) c['comprec_nonzero'] = 1 except Exception as ex: c['comprec_1|2'] = ValueStats(0) c['comprec_2|1'] = ValueStats(0) print('~',ex, file=sys.stderr) print(' '.join(a1.alltokens)+'\n', file=sys.stderr) if 'No spanning trees' in ex.message: c['no_spanning_trees'] = 1 elif 'No compatible trees' in ex.message: c['no_compatible_trees'] = 1 # due to external-attachment-to-FE constraint else: print('unknown error in promcom()', file=sys.stderr) raise except Exception as ex: c['comprec_1|2'] = ValueStats(0) c['comprec_2|1'] = ValueStats(0) print('~',ex, file=sys.stderr) print(' '.join(a1.alltokens)+'\n', file=sys.stderr) if 'any possible heads' in ex.message: c['empty_spanning_tree_graph'] = 1 else: raise except Exception as ex: c['comprec_1|2'] = ValueStats(0) c['comprec_2|1'] = ValueStats(0) print('~',ex, file=sys.stderr) print(' '.join(a1.alltokens)+'\n', file=sys.stderr) if 'cycle' in ex.message: c['merge_cycle'] = 1 elif 'specified top' in ex.message: c['merge_extra_top'] = 1 else: raise c['no_valid_merge'] = 1