Example #1
0
					def _isPn(x):
						f_p_pn, synsets = False, []

						for pp in henryext.getLiteralsFromTerm(ctx, x[1][1]):
							if pp[0] in g_pnp: f_p_pn = True
							if pp[0].startswith("synset"): synsets += [pp[0]]

						return f_p_pn, synsets
Example #2
0
def cbPreprocess( ctx, obs ):

	ret	= []
	bsn = henryext.getTargetName(ctx).split(".lisp")[0]
	
	# Annotating WordNet predicates.
	if pa.wnannotate:
		es_id = 0

		for obp, obargs, obid, obx, obwn in obs:

			m = re.search( "-(nn|adj|vb)$", obp )
			if None != m:
				try:
					if len(obp.split("-")) < 2: continue
					print "%s.%s.%s" % (obp.split("-")[-2], {"nn": "n", "adj": "a", "vb": "v"}.get(obp.split("-")[-1]), g_sen.get("%s-%s" % (bsn, obx.split(":")[2][1:-1]), "01"))
					s = corpus.wordnet.synset("%s.%s.%s" % (obp.split("-")[-2], {"nn": "n", "adj": "a", "vb": "v"}.get(obp.split("-")[-1]), g_sen.get("%s-%s" % (bsn, obx.split(":")[2][1:-1]), "01")))

					for h in [s] + (s.hypernyms() if pa.wnhypannotate else []):
						es_id += 1
						ret += [("synset%d%08d" % ({"n": 1, "a": 3, "v": 2}.get(h.pos, 0), h.offset), ["s%d" % es_id, obargs[{"n": 1, "a": 1, "v": 0}.get(h.pos, 0)]], [], 0.0)]
					
				except corpus.reader.WordNetError: continue

	# Narrative schema predicates.
	if pa.ncannotate:
		for obp, obargs, obid, obx, obwn in obs:
			if obp.endswith("-vb"):

				# Search for the narrative schema.
				for i in xrange(1,len(obargs)):
					if "u" in obargs[i]: continue
					
					argpred = [x for x in henryext.getLiteralsFromTerm(ctx, obargs[i]) if x[0].endswith("-nn") or x[0].startswith("~")]

					if 0 == len(argpred):
						ret += _getSchemaLiterals("%s-%s" % (obp.split("-")[-2], {1: "s", 2: "o", 3: "o"}[i]), obargs[i], [obid], obwn)
					else:
						for ap in argpred:
							if ap[0].startswith("~"):
								ret += _getSchemaLiterals("%s-%s" % (obp.split("-")[-2], {1: "s", 2: "o", 3: "o"}[i]), obargs[i], [obid, ap[2]], obwn)
							else:
								ret += _getSchemaLiterals("%s-%s,%s" % (obp.split("-")[-2], {1: "s", 2: "o", 3: "o"}[i], ap[0].split("-")[-2]), obargs[i], [obid, ap[2]], obwn+ap[5])
											
	return ret
Example #3
0
def cbScoreFunction( ctx ):
	if pa.donothing: return []
	
	ret		= []
	set_p = henryext.getLiterals( ctx )

	# Explicit Non-identity
	if not pa.noexplident:
		for lfs in g_explnids_list:
			eq, inst = _getMatchingSets( ctx, [_break(lf.replace( "'", "" )) for lf in lfs.split( " & " )] )

			if 0 < len(eq) and "" != eq["x"][0] and "" != eq["y"][0]:
				ret += [([["c%s %s" % (eq["x"][0], eq["y"][0])]], "EXPLICIT_NONIDENT", -1)]
	
	same_pred_args = {}
	cp_prior			 = {}
	fr_cache			 = {}
	
	for p in set_p:
		if henryext.isTimeout(ctx): return ret
		if p[0] in ["=", "!="]: continue

		# COST FOR p.
		def _getDepth(x):
			return 0 if "" == x[4].strip() else len(x[4].split(","))

		psuf		 = re.findall("-([^-]+)$", p[0])
		psuf		 = (psuf[0] if 0 < len(psuf) else "")
		predgen  = psuf

		if "" == predgen:
			if "FN" in p[0]: predgen = p[0]
			if "synset" in p[0]: predgen = "synset"
			if "Script" in p[0]: predgen = "Script"
			if "cause"  in p[0]: predgen = "cause"
			if "entail"  in p[0]: predgen = "entail"
		
		ret += [([["p%d" % p[2]]], "!HYPOTHESIZED_%s" % (predgen), -p[5]-0.001)]
		
		# FUNCTIONAL WORDS
		if not pa.nofuncrel:
			for i, fr in enumerate(g_funcrel[p[0]]):
				if fr_cache.has_key(i): continue

				fr_cache[i] = 1
				lfs, score = fr

				eq, inst = _getMatchingSets( ctx, [_break(lf.replace( "'", "" )) for lf in lfs.split( " & " )] )

				if 2 == len(inst) and 2 <= len(eq["x2"]):
					ret += [([["!c%s %s" % (eq["x2"][0], eq["x2"][1]), "c%s %s" % (eq["x1"][0], eq["x1"][1])]], "FUNC_REL", -1)]
		
		# CREATE EXPLANATION FACTORS FOR p.
		dnf_expl = []

		# ARGUMENTS AT THE SAME OBSERVABLE PREDICATES.
		if not pa.noargineq and "vb" == psuf and 4 == len(p[1]):
			if not same_pred_args.has_key("%s-%s" % (p[1][1], p[1][2])) and not same_pred_args.has_key("%s-%s" % (p[1][2], p[1][1])): ret += [([["c%s %s" % (p[1][1], p[1][2])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][1], p[1][2])] = 1
			if not same_pred_args.has_key("%s-%s" % (p[1][2], p[1][3])) and not same_pred_args.has_key("%s-%s" % (p[1][3], p[1][2])): ret += [([["c%s %s" % (p[1][2], p[1][3])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][2], p[1][3])] = 1
			if not same_pred_args.has_key("%s-%s" % (p[1][1], p[1][3])) and not same_pred_args.has_key("%s-%s" % (p[1][3], p[1][1])): ret += [([["c%s %s" % (p[1][1], p[1][3])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][1], p[1][3])] = 1

		
		for q in set_p:
			if q[0] in ["=", "!="]: continue
			if p[2] == q[2]:        continue

			if "" != p[4] and "" != q[4]:

				# FRAME x FRAME DISJOINTNESS.
				if g_fndisj.has_key("%s-%s" % (p[0], q[0])):
					ret      += [([fc_cooc + ["c%s %s" % (p[1][0], q[1][0])]], "!FN_DISJOINT_FRAMES", -1000)]
				
				# IF THEY ARE EXPLAINING THE SAME THING, JUST IGNORE THEM. (p => q, p => q)
				#if 0 < len(set(p[4].split(",")) & set(q[4].split(","))): continue
				
				# SELF-EXPLANATION IS PROHIBITED. (p => q => p)
				#if repr(q[2]) in p[4].split(","): continue
				
			psuf, qsuf		 = re.findall("-([^-]+)$", p[0]), re.findall("-([^-]+)$", q[0])
			psuf, qsuf		 = (psuf[0] if 0 < len(psuf) else ""), (qsuf[0] if 0 < len(qsuf) else "")
			fc_cooc				 = ["p%d" % p[2], "p%d" % q[2]]
			if len(q[1]) > 0 and len(p[1]) > 0:
				fc_cooc_vu0		 = fc_cooc + ["c%s %s" % (p[1][0], q[1][0])]
			else:
				fc_cooc_vu0 = fc_cooc

			if len(q[1]) > 1 and len(p[1]) > 1:
				fc_cooc_vu1		 = fc_cooc + ["c%s %s" % (p[1][min(1, len(p[1]))], q[1][min(1, len(q[1]))])]
			fc_cooc_vuall1 = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(1,len(p[1]))] if 1 < len(p[1]) and len(p[1]) == len(q[1]) else [])
			fc_cooc_vuall  = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(len(p[1]))] if len(p[1]) == len(q[1]) else [])

			#
			# EXPLANATION FOR p.
			if p[0] != q[0] and repr(p[2]) in q[4].split(","): dnf_expl += [(fc_cooc, "", 1)]

			# PRONOUN COMPATIBILITY.
			if p[0] in "person per".split() and q[0] in "male female".split(): dnf_expl += [(fc_cooc_vu1, "", 1)]
			if "thing" == p[0]  and q[0] in "neuter".split():                  dnf_expl += [(fc_cooc_vu1, "", 1)]

			# nn => PRONOUN
			if p[0] != q[0] and (p[0] in g_prnp or p[0] in g_pnp) and "nn" == qsuf:
				dnf_expl += [(fc_cooc_vu1, "", 1)]
				
			#
			# EXPLANATION BY UNIFICATION.
				
			# BELOW ARE EXPLANATION BY UNIFICATION; AVOID DOUBLE-COUNT.			
			_bothStartsWith = lambda x: p[0].startswith(x) and q[0].startswith(x)
			_samePreds      = lambda: p[0] == q[0]

			# CLASSICAL UNIFICATION.
			if _samePreds() and (p[5] > q[5] or (p[5] == q[5] and p[2] > q[2])):
				for cu in g_wep:
					if not pa.nowep and None != cu.search(p[0]): break
				else:
					if not (pa.nosynunify and p[0].startswith("synset")):
						dnf_expl += [(fc_cooc_vuall, "UNIFY_PROPOSITIONS", 1)]

			# UNIFICATION COST.
			if not pa.nocp and p[0] == q[0] and len(p[1]) == len(q[1]) and p[2] > q[2]:
				for i in xrange(len(p[1])):
					if p[1][i] == q[1][i]: continue
					
					# FEATURE MAP CONVERTER.
					fe, fv = "%s.%d" % (p[0], i), 0
					
					for matcher, after, value in g_fm:
						fes, n = matcher.subn(after, fe)
						fv		 = value
						#fv     = 0
												
						if 0 == n: continue

						# ADD AS AN EVIDENCE OF VARIABLE UNIFICATION.
						if not (pa.nosynunify and "SYNSET" in fes):
							
							# VALUE CONVERTER.
							if "$abstlevel" == fv: fv = g_word_abst.get(p[0], 0)
							elif "$wordfreq" == fv: fv = g_word_freq.get(p[0].split("-")[-2], 0)

							ret += [([fc_cooc + ["c%s %s" % (p[1][i], q[1][i])]], fes, float(fv))]

							
			# if p[0].startswith("synset1"):
			# 	syn_dist = len(corpus.wordnet._synset_from_pos_and_offset("n", int(p[0][7:])).hypernym_distances())

			# 	# BOXER AUX x SYNSET (>11)
			# 	if p[0] != q[0] and 10<=syn_dist and q[0] in g_pnp:
			# 		ret += [([fc_cooc_vu1], "PN-%s_SYN-%s_UNIFY_Y" % (q[0], p[0][6:]), 1)]

			# 	# PRONOUN x SYNSET (>11)
			# 	if p[0] != q[0] and 10<=syn_dist and q[0] in g_prnp:
			# 		ret += [([fc_cooc_vu1], "PRO-%s_SYN-%s_UNIFY_Y" % (q[0], p[0][6:]), 1)]

 			# BOXER AUX x PRONOUN
			if not pa.noinc and p[0] != q[0] and p[0] in g_pnp and q[0] in g_prnp:
				ret += [([fc_cooc_vu1], "PN-%s_PRO-%s_UNIFY_Y" % (p[0], q[0]), 1)]

			# FRAME x SYNSET: FRAMENET SELECTIONAL RESTRICTION.
			if not pa.nofnsr:
				if p[0].startswith("synset") and q[0].startswith("FN"):
					syn, fn = (p, q) if p[0].startswith("synset") and q[0].startswith("FN") else (q, p)
					fnsr = g_fnsr.get( "%s-%s" % (syn[0], fn[0]) )
					if None != fnsr: 
						ret += [([fc_cooc + ["c%s %s" % (fn[1][fnsr[3].index("x")], syn[1][1])]], "FN_%s_SEL_RESTR_Y" % fn[0], -1+fnsr[1])]

			# SYMMETRIC FEATURES.
			if p[2] > q[2]:
				
				# WORDNET FEATURE.
				if p[0] != q[0] and _bothStartsWith("synset"):
					if g_wnanto.has_key("%s-%s" % (q[0][7:], p[0][7:])) or g_wnanto.has_key("%s-%s" % (p[0][7:], q[0][7:])):
						if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "WN_ANTONYMOUS_SYNSET_Y", -1)]
					else:
						prnt1, prnt2 = g_wnhier.get( p[0][6:] ), g_wnhier.get( q[0][6:] )
						if None != prnt1 and prnt1 == prnt2 and p[1][1] != q[1][1]:
							if not pa.noinc: ret += [([fc_cooc_vu1], "WN_SIBLING_SYNSET_Y", -1)]

				if not pa.noder and len(p[1]) > 1 and len(q[1]) > 1 and p[1][1] != q[1][1] and (g_wnder.has_key("%s-%s" % (p[0], q[0])) or g_wnder.has_key("%s-%s" % (q[0], p[0]))):
					ret += [([fc_cooc_vu1], "WN_DERIVATIONAL_Y", 1)]
					
				# HAND-CRAFTED INCOMPATIBILITY.
				if g_handinc.has_key("%s %s" % (p[0], q[0])) or g_handinc.has_key("%s %s" % (q[0], p[0])):
					if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "HD_INCOMPATIBLE_Y", -1)]
				
				# try: dnf_expl += [(fc_cooc_vu1, "PRONOUN_%s_SENTDIST_%s" % (p[0], min(1,abs(int(p[1][1].split("x")[0]) - int(q[1][1].split("x")[0])))), 1) ]
				# except ValueError: pass

				# PROPER NAMES THAT DON'T BELONG TO THE SAME SYNSET
				if "nn" == psuf == qsuf and p[0] != q[0]:
					def _isPn(x):
						if len(x[1])<2: return False, []

						f_p_pn, synsets = False, []

						for pp in henryext.getLiteralsFromTerm(ctx, x[1][1]):
							if pp[0] in g_pnp: f_p_pn = True
							if pp[0].startswith("synset"): synsets += [pp[0]]

						return f_p_pn, synsets

					pj, qj = _isPn(p), _isPn(q)

					if pj[0] and qj[0] and 0 == len(set(pj[1]) & set(qj[1])):
						if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "DIFFERENT_PROPERNAMES_UNIFIED", -1)]
						
				#
				# CONSTRAINTS

				# ARGUMENT CONSTRAINTS.
				if not pa.noargineq:
					if p[0] == q[0] and len(p[1]) == len(q[1]):
						eas = ["%s%s %s" % ("c" if 0==i else "!c", p[1][i], q[1][i]) for i in xrange(len(p[1])) if ("e" in p[1][i] or "e" in q[1][i]) and p[1][i] != q[1][i]]

						if 2 <= len(eas):
							ret += [([fc_cooc + eas], "ARGUMENT_CONSTR", -1)]

					# EVENT-DENOTING VARIBLE CONSTRAINTS.
					if _samePreds() and psuf == qsuf == "vb":
						try:
							ret += [([fc_cooc_vu0 + ["!c%s %s" % (p[1][i], q[1][i])]], "ARGUMENT_CONSTR", -1) for i in xrange(1, len(p[1])) if p[1][i] != q[1][i]]
						except IndexError:
							pass

					# 
					# if p[0] == q[0] and "in" == psuf == qsuf and 2 < len(p[1]) and 2 < len(q[1]):
					# 	ret += [([fc_cooc + ["c%s %s" % (p[1][1], q[1][1]), "!c%s %s" % (p[1][2], q[1][2])]], "ARGUMENT_CONSTR", -1)]

				# MODALITY CONSTRAINTS.
				if not pa.nomodality:
					if psuf == qsuf == "vb" and p[0] == q[0] and p[1][0] != q[1][0]:
						try:
							ps, qs = [x for x in henryext.getLiteralsFromTerm(ctx, p[1][0]) if (x[0] in g_mp and x[1][1] == p[1][0]) or (x[0].endswith("vb") and x[1][2] == p[1][0])], \
									[x for x in henryext.getLiteralsFromTerm(ctx, q[1][0]) if (x[0] in g_mp and x[1][1] == q[1][0]) or (x[0].endswith("vb") and x[1][2] == q[1][0])]

							if len(ps) > 0 or len(qs) > 0:
								ret += [([fc_cooc + ["c%s %s" % (p[1][i], q[1][i]) for i in xrange(0,len(p[1])) if "u" not in p[1][i] or "u" not in q[1][i]]], "MODALITY_CONSTR", -1)]
						except IndexError:
							pass						

		# CREATE FEATURE FOR EACH DISJUNCTIVE CLAUSE
		# for disj in dnf_expl:
		#  	ret += [([disj[0]], disj[1], -0.1)]
		
		# CREATE FEATURE FOR DNF.
		ret += [([disj[0] for disj in dnf_expl], "!EXPLAINED_%s" % (predgen), p[5])]
			
	return ret