Beispiel #1
0
def cbScoreFunction( ctx ):

	ret		= []
	set_p = henryext.getLiterals( ctx )

	for p in set_p:
		if henryext.isTimeout(ctx): return ret
		if "=" == p[0]: continue

		if 1 == p[8]:
			ret += [([["p%d" % p[2]]], "$PROHIBITED_%s" % (p[0]), -10000)]

		else:
			# COST FOR p.
			if 4 == p[6]:
				ret += [([["p%d" % p[2]]], "$HYPOTHESIZED_%s" % (p[0]), -10000)] #-p[5]-0.001)]

			else:
				ret += [([["p%d" % p[2]]], "HYPOTHESIZED_%s_%s" % (p[0], p[3]), 1)] #-p[5]-0.001)]
				#ret += [([["p%d" % p[2]]], "!HYPOTHESIZED_%s" % (p[0]), -p[5]-0.001)]

		if "!=" == p[0]: continue
		
		# CREATE EXPLANATION FACTORS FOR p.
		dnf_expl, expl = [], defaultdict(list)

		for q in set_p:
			if q[0] in ["=", "!="]: continue
			if p[2] == q[2]:        continue
			if 4 == q[6]:           continue

			if "" != p[4] and "" != q[4]:

				# SELF-EXPLANATION IS PROHIBITED. (p => q => p)
				if repr(q[2]) in p[4].split(","): continue
				
			fc_cooc				 = ["p%d" % p[2], "p%d" % q[2]]
			fc_cooc_vuall  = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(len(p[1]))] if len(p[1]) == len(q[1]) else [])
			
			# if (2 == p[6] and 3 == q[6]) or (2 == q[6] and 3 == p[6]):
			# 	ret += [([fc_cooc], "COOC_%s-%s" % (max(p[0],q[0]), min(p[0],q[0])), 1)]

			# if 3 == p[6] and 3 == q[6]:
			# 	ret += [([fc_cooc], "HYPO_COOC_%s-%s" % (max(p[0],q[0]), min(p[0],q[0])), 1)]

			# if p[0] != q[0] and (("inst_" in p[0] and "inst_" not in q[0]) or ("inst_" in q[0] and "inst_" not in p[0])) and p[0] in plan_predicates and q[0] in plan_predicates:
			# 	ret +=  [([fc_cooc], "HYPO_PLANPREDS_%s-%s" % (max(p[0],q[0]), min(p[0],q[0])), 1)]
			
			#
			# EXPLANATION FOR p.
			if 4 != p[6]:
				if p[0] != q[0] and repr(p[2]) in q[4].split(","): expl[(q[7], q[3])] += ["p%d" % q[2]]
				
			#
			# EXPLANATION BY UNIFICATION.
			# if "" != p[4] and "" != q[4]:

			# 	# IF THEY ARE EXPLAINING THE SAME THING, JUST IGNORE THEM. (p => q, p => q)
			# 	if 0 < len(set(p[4].split(",")) & set(q[4].split(","))): continue

			if g_disj.has_key("%s/1\t%s/1" % (p[0], q[0])) or g_disj.has_key("%s/1\t%s/1" % (q[0], p[0])):
				if pa.disjoint:
					ret += [([fc_cooc_vuall], "DISJOINT_%s-%s" % (max(p[0],q[0]), min(p[0],q[0])), -9999)]
				elif pa.disjointfeature:
					ret += [([fc_cooc_vuall], "DISJOINT_%s-%s" % (max(p[0],q[0]), min(p[0],q[0])), 1)]
					
			#
			# BELOW ARE EXPLANATION BY UNIFICATION; AVOID DOUBLE-COUNT.			
			_bothStartsWith = lambda x: p[0].startswith(x) and q[0].startswith(x)
			_samePreds      = lambda: p[0] == q[0]

			# CLASSICAL UNIFICATION.
			if _samePreds():
				if 4 == p[6]:
					dnf_expl += [fc_cooc_vuall]
					
				elif (p[5] > q[5] or (p[5] == q[5] and p[2] > q[2])):
					dnf_expl += [fc_cooc_vuall]
					ret += [([fc_cooc_vuall], "UNIFY_%s" % p[0], 1)]

		# CREATE FEATURE FOR THE DNF.
		if 4 == p[6]:
			ret += [(dnf_expl, "$EXPLAINED_BY_UNIF_%s" % (p[0]), 10000)]

		else:
			# ret += [(dnf_expl, "!EXPLAINED_BY_UNIF_%s" % (p[0]), 1)] #p[5])]
			#ret += [(expl.values(), "EXPLAINED_BY_LIT_%s" % (p[0]), 1)] #p[5])]
			# ret += [(dnf_expl+expl.values(), "!EXPLAINED_BY_LIT_%s" % (p[0]), p[5])]
			ret += [(dnf_expl+expl.values(), "EXPLAINED_BY_%s_%s" % (p[0], p[3]), 1)]
			
			for k, v in expl.iteritems():
				# ret += [([v], "AXIOM_%s" % (k[1] if not pa.genaxiom else "_".join(k[1].split("_")[:-1])), 1)]
				if "" == p[10]: continue
				
			 	ret += [([v], "AXIOM_%s" % p[10], 1)]
				
	return ret
Beispiel #2
0
def cbScoreFunction( ctx ):
	if pa.donothing: return []
	
	ret		= []
	set_p = henryext.getLiterals( ctx )

	# Explicit Non-identity
	if not pa.noexplident:
		for lfs in g_explnids_list:
			eq, inst = _getMatchingSets( ctx, [_break(lf.replace( "'", "" )) for lf in lfs.split( " & " )] )

			if 0 < len(eq) and "" != eq["x"][0] and "" != eq["y"][0]:
				ret += [([["c%s %s" % (eq["x"][0], eq["y"][0])]], "EXPLICIT_NONIDENT", -1)]
	
	same_pred_args = {}
	cp_prior			 = {}
	fr_cache			 = {}
	
	for p in set_p:
		if henryext.isTimeout(ctx): return ret
		if p[0] in ["=", "!="]: continue

		# COST FOR p.
		def _getDepth(x):
			return 0 if "" == x[4].strip() else len(x[4].split(","))

		psuf		 = re.findall("-([^-]+)$", p[0])
		psuf		 = (psuf[0] if 0 < len(psuf) else "")
		predgen  = psuf

		if "" == predgen:
			if "FN" in p[0]: predgen = p[0]
			if "synset" in p[0]: predgen = "synset"
			if "Script" in p[0]: predgen = "Script"
			if "cause"  in p[0]: predgen = "cause"
			if "entail"  in p[0]: predgen = "entail"
		
		ret += [([["p%d" % p[2]]], "!HYPOTHESIZED_%s" % (predgen), -p[5]-0.001)]
		
		# FUNCTIONAL WORDS
		if not pa.nofuncrel:
			for i, fr in enumerate(g_funcrel[p[0]]):
				if fr_cache.has_key(i): continue

				fr_cache[i] = 1
				lfs, score = fr

				eq, inst = _getMatchingSets( ctx, [_break(lf.replace( "'", "" )) for lf in lfs.split( " & " )] )

				if 2 == len(inst) and 2 <= len(eq["x2"]):
					ret += [([["!c%s %s" % (eq["x2"][0], eq["x2"][1]), "c%s %s" % (eq["x1"][0], eq["x1"][1])]], "FUNC_REL", -1)]
		
		# CREATE EXPLANATION FACTORS FOR p.
		dnf_expl = []

		# ARGUMENTS AT THE SAME OBSERVABLE PREDICATES.
		if not pa.noargineq and "vb" == psuf and 4 == len(p[1]):
			if not same_pred_args.has_key("%s-%s" % (p[1][1], p[1][2])) and not same_pred_args.has_key("%s-%s" % (p[1][2], p[1][1])): ret += [([["c%s %s" % (p[1][1], p[1][2])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][1], p[1][2])] = 1
			if not same_pred_args.has_key("%s-%s" % (p[1][2], p[1][3])) and not same_pred_args.has_key("%s-%s" % (p[1][3], p[1][2])): ret += [([["c%s %s" % (p[1][2], p[1][3])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][2], p[1][3])] = 1
			if not same_pred_args.has_key("%s-%s" % (p[1][1], p[1][3])) and not same_pred_args.has_key("%s-%s" % (p[1][3], p[1][1])): ret += [([["c%s %s" % (p[1][1], p[1][3])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][1], p[1][3])] = 1

		
		for q in set_p:
			if q[0] in ["=", "!="]: continue
			if p[2] == q[2]:        continue

			if "" != p[4] and "" != q[4]:

				# FRAME x FRAME DISJOINTNESS.
				if g_fndisj.has_key("%s-%s" % (p[0], q[0])):
					ret      += [([fc_cooc + ["c%s %s" % (p[1][0], q[1][0])]], "!FN_DISJOINT_FRAMES", -1000)]
				
				# IF THEY ARE EXPLAINING THE SAME THING, JUST IGNORE THEM. (p => q, p => q)
				#if 0 < len(set(p[4].split(",")) & set(q[4].split(","))): continue
				
				# SELF-EXPLANATION IS PROHIBITED. (p => q => p)
				#if repr(q[2]) in p[4].split(","): continue
				
			psuf, qsuf		 = re.findall("-([^-]+)$", p[0]), re.findall("-([^-]+)$", q[0])
			psuf, qsuf		 = (psuf[0] if 0 < len(psuf) else ""), (qsuf[0] if 0 < len(qsuf) else "")
			fc_cooc				 = ["p%d" % p[2], "p%d" % q[2]]
			if len(q[1]) > 0 and len(p[1]) > 0:
				fc_cooc_vu0		 = fc_cooc + ["c%s %s" % (p[1][0], q[1][0])]
			else:
				fc_cooc_vu0 = fc_cooc

			if len(q[1]) > 1 and len(p[1]) > 1:
				fc_cooc_vu1		 = fc_cooc + ["c%s %s" % (p[1][min(1, len(p[1]))], q[1][min(1, len(q[1]))])]
			fc_cooc_vuall1 = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(1,len(p[1]))] if 1 < len(p[1]) and len(p[1]) == len(q[1]) else [])
			fc_cooc_vuall  = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(len(p[1]))] if len(p[1]) == len(q[1]) else [])

			#
			# EXPLANATION FOR p.
			if p[0] != q[0] and repr(p[2]) in q[4].split(","): dnf_expl += [(fc_cooc, "", 1)]

			# PRONOUN COMPATIBILITY.
			if p[0] in "person per".split() and q[0] in "male female".split(): dnf_expl += [(fc_cooc_vu1, "", 1)]
			if "thing" == p[0]  and q[0] in "neuter".split():                  dnf_expl += [(fc_cooc_vu1, "", 1)]

			# nn => PRONOUN
			if p[0] != q[0] and (p[0] in g_prnp or p[0] in g_pnp) and "nn" == qsuf:
				dnf_expl += [(fc_cooc_vu1, "", 1)]
				
			#
			# EXPLANATION BY UNIFICATION.
				
			# BELOW ARE EXPLANATION BY UNIFICATION; AVOID DOUBLE-COUNT.			
			_bothStartsWith = lambda x: p[0].startswith(x) and q[0].startswith(x)
			_samePreds      = lambda: p[0] == q[0]

			# CLASSICAL UNIFICATION.
			if _samePreds() and (p[5] > q[5] or (p[5] == q[5] and p[2] > q[2])):
				for cu in g_wep:
					if not pa.nowep and None != cu.search(p[0]): break
				else:
					if not (pa.nosynunify and p[0].startswith("synset")):
						dnf_expl += [(fc_cooc_vuall, "UNIFY_PROPOSITIONS", 1)]

			# UNIFICATION COST.
			if not pa.nocp and p[0] == q[0] and len(p[1]) == len(q[1]) and p[2] > q[2]:
				for i in xrange(len(p[1])):
					if p[1][i] == q[1][i]: continue
					
					# FEATURE MAP CONVERTER.
					fe, fv = "%s.%d" % (p[0], i), 0
					
					for matcher, after, value in g_fm:
						fes, n = matcher.subn(after, fe)
						fv		 = value
						#fv     = 0
												
						if 0 == n: continue

						# ADD AS AN EVIDENCE OF VARIABLE UNIFICATION.
						if not (pa.nosynunify and "SYNSET" in fes):
							
							# VALUE CONVERTER.
							if "$abstlevel" == fv: fv = g_word_abst.get(p[0], 0)
							elif "$wordfreq" == fv: fv = g_word_freq.get(p[0].split("-")[-2], 0)

							ret += [([fc_cooc + ["c%s %s" % (p[1][i], q[1][i])]], fes, float(fv))]

							
			# if p[0].startswith("synset1"):
			# 	syn_dist = len(corpus.wordnet._synset_from_pos_and_offset("n", int(p[0][7:])).hypernym_distances())

			# 	# BOXER AUX x SYNSET (>11)
			# 	if p[0] != q[0] and 10<=syn_dist and q[0] in g_pnp:
			# 		ret += [([fc_cooc_vu1], "PN-%s_SYN-%s_UNIFY_Y" % (q[0], p[0][6:]), 1)]

			# 	# PRONOUN x SYNSET (>11)
			# 	if p[0] != q[0] and 10<=syn_dist and q[0] in g_prnp:
			# 		ret += [([fc_cooc_vu1], "PRO-%s_SYN-%s_UNIFY_Y" % (q[0], p[0][6:]), 1)]

 			# BOXER AUX x PRONOUN
			if not pa.noinc and p[0] != q[0] and p[0] in g_pnp and q[0] in g_prnp:
				ret += [([fc_cooc_vu1], "PN-%s_PRO-%s_UNIFY_Y" % (p[0], q[0]), 1)]

			# FRAME x SYNSET: FRAMENET SELECTIONAL RESTRICTION.
			if not pa.nofnsr:
				if p[0].startswith("synset") and q[0].startswith("FN"):
					syn, fn = (p, q) if p[0].startswith("synset") and q[0].startswith("FN") else (q, p)
					fnsr = g_fnsr.get( "%s-%s" % (syn[0], fn[0]) )
					if None != fnsr: 
						ret += [([fc_cooc + ["c%s %s" % (fn[1][fnsr[3].index("x")], syn[1][1])]], "FN_%s_SEL_RESTR_Y" % fn[0], -1+fnsr[1])]

			# SYMMETRIC FEATURES.
			if p[2] > q[2]:
				
				# WORDNET FEATURE.
				if p[0] != q[0] and _bothStartsWith("synset"):
					if g_wnanto.has_key("%s-%s" % (q[0][7:], p[0][7:])) or g_wnanto.has_key("%s-%s" % (p[0][7:], q[0][7:])):
						if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "WN_ANTONYMOUS_SYNSET_Y", -1)]
					else:
						prnt1, prnt2 = g_wnhier.get( p[0][6:] ), g_wnhier.get( q[0][6:] )
						if None != prnt1 and prnt1 == prnt2 and p[1][1] != q[1][1]:
							if not pa.noinc: ret += [([fc_cooc_vu1], "WN_SIBLING_SYNSET_Y", -1)]

				if not pa.noder and len(p[1]) > 1 and len(q[1]) > 1 and p[1][1] != q[1][1] and (g_wnder.has_key("%s-%s" % (p[0], q[0])) or g_wnder.has_key("%s-%s" % (q[0], p[0]))):
					ret += [([fc_cooc_vu1], "WN_DERIVATIONAL_Y", 1)]
					
				# HAND-CRAFTED INCOMPATIBILITY.
				if g_handinc.has_key("%s %s" % (p[0], q[0])) or g_handinc.has_key("%s %s" % (q[0], p[0])):
					if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "HD_INCOMPATIBLE_Y", -1)]
				
				# try: dnf_expl += [(fc_cooc_vu1, "PRONOUN_%s_SENTDIST_%s" % (p[0], min(1,abs(int(p[1][1].split("x")[0]) - int(q[1][1].split("x")[0])))), 1) ]
				# except ValueError: pass

				# PROPER NAMES THAT DON'T BELONG TO THE SAME SYNSET
				if "nn" == psuf == qsuf and p[0] != q[0]:
					def _isPn(x):
						if len(x[1])<2: return False, []

						f_p_pn, synsets = False, []

						for pp in henryext.getLiteralsFromTerm(ctx, x[1][1]):
							if pp[0] in g_pnp: f_p_pn = True
							if pp[0].startswith("synset"): synsets += [pp[0]]

						return f_p_pn, synsets

					pj, qj = _isPn(p), _isPn(q)

					if pj[0] and qj[0] and 0 == len(set(pj[1]) & set(qj[1])):
						if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "DIFFERENT_PROPERNAMES_UNIFIED", -1)]
						
				#
				# CONSTRAINTS

				# ARGUMENT CONSTRAINTS.
				if not pa.noargineq:
					if p[0] == q[0] and len(p[1]) == len(q[1]):
						eas = ["%s%s %s" % ("c" if 0==i else "!c", p[1][i], q[1][i]) for i in xrange(len(p[1])) if ("e" in p[1][i] or "e" in q[1][i]) and p[1][i] != q[1][i]]

						if 2 <= len(eas):
							ret += [([fc_cooc + eas], "ARGUMENT_CONSTR", -1)]

					# EVENT-DENOTING VARIBLE CONSTRAINTS.
					if _samePreds() and psuf == qsuf == "vb":
						try:
							ret += [([fc_cooc_vu0 + ["!c%s %s" % (p[1][i], q[1][i])]], "ARGUMENT_CONSTR", -1) for i in xrange(1, len(p[1])) if p[1][i] != q[1][i]]
						except IndexError:
							pass

					# 
					# if p[0] == q[0] and "in" == psuf == qsuf and 2 < len(p[1]) and 2 < len(q[1]):
					# 	ret += [([fc_cooc + ["c%s %s" % (p[1][1], q[1][1]), "!c%s %s" % (p[1][2], q[1][2])]], "ARGUMENT_CONSTR", -1)]

				# MODALITY CONSTRAINTS.
				if not pa.nomodality:
					if psuf == qsuf == "vb" and p[0] == q[0] and p[1][0] != q[1][0]:
						try:
							ps, qs = [x for x in henryext.getLiteralsFromTerm(ctx, p[1][0]) if (x[0] in g_mp and x[1][1] == p[1][0]) or (x[0].endswith("vb") and x[1][2] == p[1][0])], \
									[x for x in henryext.getLiteralsFromTerm(ctx, q[1][0]) if (x[0] in g_mp and x[1][1] == q[1][0]) or (x[0].endswith("vb") and x[1][2] == q[1][0])]

							if len(ps) > 0 or len(qs) > 0:
								ret += [([fc_cooc + ["c%s %s" % (p[1][i], q[1][i]) for i in xrange(0,len(p[1])) if "u" not in p[1][i] or "u" not in q[1][i]]], "MODALITY_CONSTR", -1)]
						except IndexError:
							pass						

		# CREATE FEATURE FOR EACH DISJUNCTIVE CLAUSE
		# for disj in dnf_expl:
		#  	ret += [([disj[0]], disj[1], -0.1)]
		
		# CREATE FEATURE FOR DNF.
		ret += [([disj[0] for disj in dnf_expl], "!EXPLAINED_%s" % (predgen), p[5])]
			
	return ret
Beispiel #3
0
def cbScoreFunction( ctx ):
	ret		= []
	set_p = henryext.getLiterals( ctx )

	for p in set_p:
		if henryext.isTimeout(ctx): return ret
		if "=" == p[0]: continue

		# COST FOR p.
		ret += [([["p%d" % p[2]]], "!HYPOTHESIZED_%s_%s" % (p[0], p[2]), -p[5]-0.001)]

		if "!=" == p[0]: continue
		
		# CREATE EXPLANATION FACTORS FOR p.
		dnf_expl, expl = [], defaultdict(list)

		for q in set_p:
			if q[0] in ["=", "!="]: continue
			if p[2] == q[2]:        continue

			fc_cooc				 = ["p%d" % p[2], "p%d" % q[2]]
			fc_cooc_vuall  = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(len(p[1]))] if len(p[1]) == len(q[1]) else [])
			
			#
			# EXPLANATION FOR p.
			if repr(p[2]) in q[4].split(","):
				
				# SELF-EXPLANATION IS PROHIBITED. (p => q => p)
				if repr(q[2]) in p[4].split(","): continue

				expl[q[7]] += ["p%d" % q[2]]

				
			#
			# EXPLANATION BY UNIFICATION.
			if "" != p[4] and "" != q[4]:

				# IF THEY HAVE EXPLAINER-EXPLAINEE RELATIONS SHIPS, DO NOT UNIFY THEM. (p => ... => q)
				if repr(q[2]) in p[4].split(","): continue
				if repr(p[2]) in q[4].split(","): continue
				#if 0 < len(set(p[4].split(",")) & set(q[4].split(","))): continue

			#
			# BELOW ARE EXPLANATION BY UNIFICATION; AVOID DOUBLE-COUNT.			
			_bothStartsWith = lambda x: p[0].startswith(x) and q[0].startswith(x)
			_samePreds      = lambda: p[0] == q[0]

			# CLASSICAL UNIFICATION.
			if _samePreds() and (p[5] > q[5] or (p[5] == q[5] and p[2] > q[2])):
				for i in xrange(len(p[1])):
					if p[1][i] != q[1][i] and p[1][i][0].isupper() and q[1][i][0].isupper(): break
				else:
					dnf_expl += [fc_cooc_vuall]

		# GROUPING BY THE CONJUNCTIONS.
		dnf_expl += expl.values()
		
		# CREATE FEATURE FOR THE DNF.
		ret += [(dnf_expl, "!EXPLAINED_%s_%s" % (p[0], p[2]), p[5])]
			
	return ret