Beispiel #1
0
def single_ann_measures(a, kirchhoff=False):
	c = Counter()
	c['lexnodes'] = len(a.lexnodes)
	c['1W'] = sum(1 for n in a.lexnodes if len(n.tokens)==1)
	c['MW'] = sum(1 for n in a.lexnodes if len(n.tokens)>1)
	c['omittedtoks'] = len(a.alltokens)-sum(len(n.tokens) for n in a.lexnodes)
	c['coordnodes'] = len(a.coordnodes)
	c['anaphlinks'] = len(a.anaphlinks)
	c['FNs'] = len(a.fenodes)
	c['explicitly rooted utterances'] = len(a.root.children)
	#print({n: n.depth for n in a.nodes})
	simplify_coord(a)
	c['projective'] = int(a.isProjective)
	# literal number of connected components in the graph (not counting the root when nothing was explicitly attached to it)
	c['fragments'] = len({n.frag for n in a.nodes if n.frag.nodes!={a.root}})
	assert c['fragments']>0,(a.nodes,a.lexnodes)
	c['max utterances'] = max(1,c['explicitly rooted utterances'])+(c['fragments']-1)
	c['min utterances'] = max(1,c['explicitly rooted utterances'])
	assert c['max utterances']>=c['min utterances'],c
	upward(a)
	downward(a)
	c['possible utterance heads'] = sum(int(a.root in n.parentcandidates) for n in a.lexnodes)
	promcom(a,c, kirchhoff=kirchhoff)

	return c
Beispiel #2
0
def iapromcom(a1, a2, c, escapebrackets=False, kirchhoff=False):
	'''
	Measures local compatibility of constraints in two annotations.
	Assumes single_ann_measures() has already been run independently on the two annotation graphs.
	See merge_annotations.py to measure global compatibility of two annotations (i.e. reasoning over the entire structure).
	'''
	a1J, a2J = a1.to_json_simplecoord(), a2.to_json_simplecoord()
	#print()
	#print(a1J)
	#print()
	#print(a2J)
	m = merge([a1J, a2J], updatelex=True, escapebrackets=escapebrackets)
	a1U = FUDGGraph(a1J)
	a2U = FUDGGraph(a2J)
	upward(a1U)
	downward(a1U)
	upward(a2U)
	downward(a2U)
	for n in a1U.lexnodes | a2U.lexnodes:
		assert n.json_name in a1U.nodesbyname,(n.json_name,m,'-------------------',a1J)
		assert n.json_name in a2U.nodesbyname,(n.json_name,m,'-------------------',a2J)
		#assert n.json_name in m['node2words'] or (n.json_name.startswith('MW(') and 'FE'+n.json_name in m['node2words']),(n.json_name,m)

	jointSuppParents = {n.name: {p.json_name for p in a1U.nodesbyname[n.json_name].parentcandidates} & {p.json_name for p in a2U.nodesbyname[n.json_name].parentcandidates} for n in (a1U.lexnodes|a2U.lexnodes)}
	
	# compute single-annotation commitment (w/ compatible lexical level)
	a1C, a2C = Counter(), Counter()
	promcom(a1U, a1C, kirchhoff=kirchhoff)
	promcom(a2U, a2C, kirchhoff=kirchhoff)
	a1com, a2com = a1C['commitment'], a2C['commitment']
	#if float(a2com)<1 and '"~2' not in a2U.alltokens:
	#	promcom(a2U, a2C, kirchhoff=kirchhoff, debug=True)
	#	assert False,(a2U.alltokens,a2C,a2.lexnodes)
	
	numer = sum(len(jointpars) for jointpars in jointSuppParents.values())
	c['softprec_1|2'] = ValueStats(numer/sum(len(n.parentcandidates) for n in a1U.lexnodes))
	c['softprec_2|1'] = ValueStats(numer/sum(len(n.parentcandidates) for n in a2U.lexnodes))
	if math.isnan(float(a1com)) or math.isnan(float(a2com)):
		assert a1C['spanning tree overflow'] or a2C['spanning tree overflow'],('internally inconsistent:', a1C if math.isnan(float(a1com)) else a2C)
		c['a1com'] = ValueStats()
		c['a2com'] = ValueStats()
		c['softcomprec_1|2'] = ValueStats()
		c['softcomprec_2|1'] = ValueStats()
		c['softcomprec_discarded'] = 1
		c['too_many_spanning_trees'] = 1
		c['comprec_1|2'] = ValueStats()
		c['comprec_2|1'] = ValueStats()
		return
	else:
		assert 0.0<=float(a1com)<=1.0,float(a1com)
		assert 0.0<=float(a2com)<=1.0,float(a2com)
		c['a1com'] = ValueStats(a1com)
		c['a2com'] = ValueStats(a2com)
		c['softcomprec_1|2'] = ValueStats(float(a1com)*float(c['softprec_1|2']))
		c['softcomprec_2|1'] = ValueStats(float(a2com)*float(c['softprec_2|1']))
	
	try:
		ma = FUDGGraph(m)
		try:
			upward(ma)
			downward(ma)
			maC = Counter()
			try:
				promcom(ma, maC, kirchhoff=kirchhoff)
				maprom = maC['promiscuity']
				macom = maC['commitment']
				c['comprec_1|2'] = ValueStats(float(macom)*float(maprom)/float(a1C['promiscuity']))
				c['comprec_2|1'] = ValueStats(float(macom)*float(maprom)/float(a2C['promiscuity']))
				c['comprec_nonzero'] = 1
			except Exception as ex:
				c['comprec_1|2'] = ValueStats(0)
				c['comprec_2|1'] = ValueStats(0)
				print('~',ex, file=sys.stderr)
				print(' '.join(a1.alltokens)+'\n', file=sys.stderr)
				if 'No spanning trees' in ex.message:
					c['no_spanning_trees'] = 1
				elif 'No compatible trees' in ex.message:
					c['no_compatible_trees'] = 1	# due to external-attachment-to-FE constraint
				else:
					print('unknown error in promcom()', file=sys.stderr)
					raise
		except Exception as ex:
			c['comprec_1|2'] = ValueStats(0)
			c['comprec_2|1'] = ValueStats(0)
			print('~',ex, file=sys.stderr)
			print(' '.join(a1.alltokens)+'\n', file=sys.stderr)
			if 'any possible heads' in ex.message:
				c['empty_spanning_tree_graph'] = 1
			else:
				raise
	except Exception as ex:
		c['comprec_1|2'] = ValueStats(0)
		c['comprec_2|1'] = ValueStats(0)
		print('~',ex, file=sys.stderr)
		print(' '.join(a1.alltokens)+'\n', file=sys.stderr)
		if 'cycle' in ex.message:
			c['merge_cycle'] = 1
		elif 'specified top' in ex.message:
			c['merge_extra_top'] = 1
		else:
			raise
		c['no_valid_merge'] = 1