Python getLiteralsFromTerm Examples

Programming Language: Python

Namespace/Package Name: henryext

Method/Function: getLiteralsFromTerm

Examples at hotexamples.com: 3

Python getLiteralsFromTerm - 3 examples found. These are the top rated real world Python examples of henryext.getLiteralsFromTerm extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: henryext.py Project: naoya-i/isishare

					def _isPn(x):
						f_p_pn, synsets = False, []

						for pp in henryext.getLiteralsFromTerm(ctx, x[1][1]):
							if pp[0] in g_pnp: f_p_pn = True
							if pp[0].startswith("synset"): synsets += [pp[0]]

						return f_p_pn, synsets

Example #2

Show file

File: i12.py Project: 52nlp/henry-n700

def cbPreprocess( ctx, obs ):

	ret	= []
	bsn = henryext.getTargetName(ctx).split(".lisp")[0]
	
	# Annotating WordNet predicates.
	if pa.wnannotate:
		es_id = 0

		for obp, obargs, obid, obx, obwn in obs:

			m = re.search( "-(nn|adj|vb)$", obp )
			if None != m:
				try:
					if len(obp.split("-")) < 2: continue
					print "%s.%s.%s" % (obp.split("-")[-2], {"nn": "n", "adj": "a", "vb": "v"}.get(obp.split("-")[-1]), g_sen.get("%s-%s" % (bsn, obx.split(":")[2][1:-1]), "01"))
					s = corpus.wordnet.synset("%s.%s.%s" % (obp.split("-")[-2], {"nn": "n", "adj": "a", "vb": "v"}.get(obp.split("-")[-1]), g_sen.get("%s-%s" % (bsn, obx.split(":")[2][1:-1]), "01")))

					for h in [s] + (s.hypernyms() if pa.wnhypannotate else []):
						es_id += 1
						ret += [("synset%d%08d" % ({"n": 1, "a": 3, "v": 2}.get(h.pos, 0), h.offset), ["s%d" % es_id, obargs[{"n": 1, "a": 1, "v": 0}.get(h.pos, 0)]], [], 0.0)]
					
				except corpus.reader.WordNetError: continue

	# Narrative schema predicates.
	if pa.ncannotate:
		for obp, obargs, obid, obx, obwn in obs:
			if obp.endswith("-vb"):

				# Search for the narrative schema.
				for i in xrange(1,len(obargs)):
					if "u" in obargs[i]: continue
					
					argpred = [x for x in henryext.getLiteralsFromTerm(ctx, obargs[i]) if x[0].endswith("-nn") or x[0].startswith("~")]

					if 0 == len(argpred):
						ret += _getSchemaLiterals("%s-%s" % (obp.split("-")[-2], {1: "s", 2: "o", 3: "o"}[i]), obargs[i], [obid], obwn)
					else:
						for ap in argpred:
							if ap[0].startswith("~"):
								ret += _getSchemaLiterals("%s-%s" % (obp.split("-")[-2], {1: "s", 2: "o", 3: "o"}[i]), obargs[i], [obid, ap[2]], obwn)
							else:
								ret += _getSchemaLiterals("%s-%s,%s" % (obp.split("-")[-2], {1: "s", 2: "o", 3: "o"}[i], ap[0].split("-")[-2]), obargs[i], [obid, ap[2]], obwn+ap[5])
											
	return ret

Example #3

Show file

File: i12.py Project: 52nlp/henry-n700

def cbScoreFunction( ctx ):
	if pa.donothing: return []
	
	ret		= []
	set_p = henryext.getLiterals( ctx )

	# Explicit Non-identity
	if not pa.noexplident:
		for lfs in g_explnids_list:
			eq, inst = _getMatchingSets( ctx, [_break(lf.replace( "'", "" )) for lf in lfs.split( " & " )] )

			if 0 < len(eq) and "" != eq["x"][0] and "" != eq["y"][0]:
				ret += [([["c%s %s" % (eq["x"][0], eq["y"][0])]], "EXPLICIT_NONIDENT", -1)]
	
	same_pred_args = {}
	cp_prior			 = {}
	fr_cache			 = {}
	
	for p in set_p:
		if henryext.isTimeout(ctx): return ret
		if p[0] in ["=", "!="]: continue

		# COST FOR p.
		def _getDepth(x):
			return 0 if "" == x[4].strip() else len(x[4].split(","))

		psuf		 = re.findall("-([^-]+)$", p[0])
		psuf		 = (psuf[0] if 0 < len(psuf) else "")
		predgen  = psuf

		if "" == predgen:
			if "FN" in p[0]: predgen = p[0]
			if "synset" in p[0]: predgen = "synset"
			if "Script" in p[0]: predgen = "Script"
			if "cause"  in p[0]: predgen = "cause"
			if "entail"  in p[0]: predgen = "entail"
		
		ret += [([["p%d" % p[2]]], "!HYPOTHESIZED_%s" % (predgen), -p[5]-0.001)]
		
		# FUNCTIONAL WORDS
		if not pa.nofuncrel:
			for i, fr in enumerate(g_funcrel[p[0]]):
				if fr_cache.has_key(i): continue

				fr_cache[i] = 1
				lfs, score = fr

				eq, inst = _getMatchingSets( ctx, [_break(lf.replace( "'", "" )) for lf in lfs.split( " & " )] )

				if 2 == len(inst) and 2 <= len(eq["x2"]):
					ret += [([["!c%s %s" % (eq["x2"][0], eq["x2"][1]), "c%s %s" % (eq["x1"][0], eq["x1"][1])]], "FUNC_REL", -1)]
		
		# CREATE EXPLANATION FACTORS FOR p.
		dnf_expl = []

		# ARGUMENTS AT THE SAME OBSERVABLE PREDICATES.
		if not pa.noargineq and "vb" == psuf and 4 == len(p[1]):
			if not same_pred_args.has_key("%s-%s" % (p[1][1], p[1][2])) and not same_pred_args.has_key("%s-%s" % (p[1][2], p[1][1])): ret += [([["c%s %s" % (p[1][1], p[1][2])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][1], p[1][2])] = 1
			if not same_pred_args.has_key("%s-%s" % (p[1][2], p[1][3])) and not same_pred_args.has_key("%s-%s" % (p[1][3], p[1][2])): ret += [([["c%s %s" % (p[1][2], p[1][3])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][2], p[1][3])] = 1
			if not same_pred_args.has_key("%s-%s" % (p[1][1], p[1][3])) and not same_pred_args.has_key("%s-%s" % (p[1][3], p[1][1])): ret += [([["c%s %s" % (p[1][1], p[1][3])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][1], p[1][3])] = 1

		
		for q in set_p:
			if q[0] in ["=", "!="]: continue
			if p[2] == q[2]:        continue

			if "" != p[4] and "" != q[4]:

				# FRAME x FRAME DISJOINTNESS.
				if g_fndisj.has_key("%s-%s" % (p[0], q[0])):
					ret      += [([fc_cooc + ["c%s %s" % (p[1][0], q[1][0])]], "!FN_DISJOINT_FRAMES", -1000)]
				
				# IF THEY ARE EXPLAINING THE SAME THING, JUST IGNORE THEM. (p => q, p => q)
				#if 0 < len(set(p[4].split(",")) & set(q[4].split(","))): continue
				
				# SELF-EXPLANATION IS PROHIBITED. (p => q => p)
				#if repr(q[2]) in p[4].split(","): continue
				
			psuf, qsuf		 = re.findall("-([^-]+)$", p[0]), re.findall("-([^-]+)$", q[0])
			psuf, qsuf		 = (psuf[0] if 0 < len(psuf) else ""), (qsuf[0] if 0 < len(qsuf) else "")
			fc_cooc				 = ["p%d" % p[2], "p%d" % q[2]]
			if len(q[1]) > 0 and len(p[1]) > 0:
				fc_cooc_vu0		 = fc_cooc + ["c%s %s" % (p[1][0], q[1][0])]
			else:
				fc_cooc_vu0 = fc_cooc

			if len(q[1]) > 1 and len(p[1]) > 1:
				fc_cooc_vu1		 = fc_cooc + ["c%s %s" % (p[1][min(1, len(p[1]))], q[1][min(1, len(q[1]))])]
			fc_cooc_vuall1 = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(1,len(p[1]))] if 1 < len(p[1]) and len(p[1]) == len(q[1]) else [])
			fc_cooc_vuall  = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(len(p[1]))] if len(p[1]) == len(q[1]) else [])

			#
			# EXPLANATION FOR p.
			if p[0] != q[0] and repr(p[2]) in q[4].split(","): dnf_expl += [(fc_cooc, "", 1)]

			# PRONOUN COMPATIBILITY.
			if p[0] in "person per".split() and q[0] in "male female".split(): dnf_expl += [(fc_cooc_vu1, "", 1)]
			if "thing" == p[0]  and q[0] in "neuter".split():                  dnf_expl += [(fc_cooc_vu1, "", 1)]

			# nn => PRONOUN
			if p[0] != q[0] and (p[0] in g_prnp or p[0] in g_pnp) and "nn" == qsuf:
				dnf_expl += [(fc_cooc_vu1, "", 1)]
				
			#
			# EXPLANATION BY UNIFICATION.
				
			# BELOW ARE EXPLANATION BY UNIFICATION; AVOID DOUBLE-COUNT.			
			_bothStartsWith = lambda x: p[0].startswith(x) and q[0].startswith(x)
			_samePreds      = lambda: p[0] == q[0]

			# CLASSICAL UNIFICATION.
			if _samePreds() and (p[5] > q[5] or (p[5] == q[5] and p[2] > q[2])):
				for cu in g_wep:
					if not pa.nowep and None != cu.search(p[0]): break
				else:
					if not (pa.nosynunify and p[0].startswith("synset")):
						dnf_expl += [(fc_cooc_vuall, "UNIFY_PROPOSITIONS", 1)]

			# UNIFICATION COST.
			if not pa.nocp and p[0] == q[0] and len(p[1]) == len(q[1]) and p[2] > q[2]:
				for i in xrange(len(p[1])):
					if p[1][i] == q[1][i]: continue
					
					# FEATURE MAP CONVERTER.
					fe, fv = "%s.%d" % (p[0], i), 0
					
					for matcher, after, value in g_fm:
						fes, n = matcher.subn(after, fe)
						fv		 = value
						#fv     = 0
												
						if 0 == n: continue

						# ADD AS AN EVIDENCE OF VARIABLE UNIFICATION.
						if not (pa.nosynunify and "SYNSET" in fes):
							
							# VALUE CONVERTER.
							if "$abstlevel" == fv: fv = g_word_abst.get(p[0], 0)
							elif "$wordfreq" == fv: fv = g_word_freq.get(p[0].split("-")[-2], 0)

							ret += [([fc_cooc + ["c%s %s" % (p[1][i], q[1][i])]], fes, float(fv))]

							
			# if p[0].startswith("synset1"):
			# 	syn_dist = len(corpus.wordnet._synset_from_pos_and_offset("n", int(p[0][7:])).hypernym_distances())

			# 	# BOXER AUX x SYNSET (>11)
			# 	if p[0] != q[0] and 10<=syn_dist and q[0] in g_pnp:
			# 		ret += [([fc_cooc_vu1], "PN-%s_SYN-%s_UNIFY_Y" % (q[0], p[0][6:]), 1)]

			# 	# PRONOUN x SYNSET (>11)
			# 	if p[0] != q[0] and 10<=syn_dist and q[0] in g_prnp:
			# 		ret += [([fc_cooc_vu1], "PRO-%s_SYN-%s_UNIFY_Y" % (q[0], p[0][6:]), 1)]

 			# BOXER AUX x PRONOUN
			if not pa.noinc and p[0] != q[0] and p[0] in g_pnp and q[0] in g_prnp:
				ret += [([fc_cooc_vu1], "PN-%s_PRO-%s_UNIFY_Y" % (p[0], q[0]), 1)]

			# FRAME x SYNSET: FRAMENET SELECTIONAL RESTRICTION.
			if not pa.nofnsr:
				if p[0].startswith("synset") and q[0].startswith("FN"):
					syn, fn = (p, q) if p[0].startswith("synset") and q[0].startswith("FN") else (q, p)
					fnsr = g_fnsr.get( "%s-%s" % (syn[0], fn[0]) )
					if None != fnsr: 
						ret += [([fc_cooc + ["c%s %s" % (fn[1][fnsr[3].index("x")], syn[1][1])]], "FN_%s_SEL_RESTR_Y" % fn[0], -1+fnsr[1])]

			# SYMMETRIC FEATURES.
			if p[2] > q[2]:
				
				# WORDNET FEATURE.
				if p[0] != q[0] and _bothStartsWith("synset"):
					if g_wnanto.has_key("%s-%s" % (q[0][7:], p[0][7:])) or g_wnanto.has_key("%s-%s" % (p[0][7:], q[0][7:])):
						if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "WN_ANTONYMOUS_SYNSET_Y", -1)]
					else:
						prnt1, prnt2 = g_wnhier.get( p[0][6:] ), g_wnhier.get( q[0][6:] )
						if None != prnt1 and prnt1 == prnt2 and p[1][1] != q[1][1]:
							if not pa.noinc: ret += [([fc_cooc_vu1], "WN_SIBLING_SYNSET_Y", -1)]

				if not pa.noder and len(p[1]) > 1 and len(q[1]) > 1 and p[1][1] != q[1][1] and (g_wnder.has_key("%s-%s" % (p[0], q[0])) or g_wnder.has_key("%s-%s" % (q[0], p[0]))):
					ret += [([fc_cooc_vu1], "WN_DERIVATIONAL_Y", 1)]
					
				# HAND-CRAFTED INCOMPATIBILITY.
				if g_handinc.has_key("%s %s" % (p[0], q[0])) or g_handinc.has_key("%s %s" % (q[0], p[0])):
					if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "HD_INCOMPATIBLE_Y", -1)]
				
				# try: dnf_expl += [(fc_cooc_vu1, "PRONOUN_%s_SENTDIST_%s" % (p[0], min(1,abs(int(p[1][1].split("x")[0]) - int(q[1][1].split("x")[0])))), 1) ]
				# except ValueError: pass

				# PROPER NAMES THAT DON'T BELONG TO THE SAME SYNSET
				if "nn" == psuf == qsuf and p[0] != q[0]:
					def _isPn(x):
						if len(x[1])<2: return False, []

						f_p_pn, synsets = False, []

						for pp in henryext.getLiteralsFromTerm(ctx, x[1][1]):
							if pp[0] in g_pnp: f_p_pn = True
							if pp[0].startswith("synset"): synsets += [pp[0]]

						return f_p_pn, synsets

					pj, qj = _isPn(p), _isPn(q)

					if pj[0] and qj[0] and 0 == len(set(pj[1]) & set(qj[1])):
						if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "DIFFERENT_PROPERNAMES_UNIFIED", -1)]
						
				#
				# CONSTRAINTS

				# ARGUMENT CONSTRAINTS.
				if not pa.noargineq:
					if p[0] == q[0] and len(p[1]) == len(q[1]):
						eas = ["%s%s %s" % ("c" if 0==i else "!c", p[1][i], q[1][i]) for i in xrange(len(p[1])) if ("e" in p[1][i] or "e" in q[1][i]) and p[1][i] != q[1][i]]

						if 2 <= len(eas):
							ret += [([fc_cooc + eas], "ARGUMENT_CONSTR", -1)]

					# EVENT-DENOTING VARIBLE CONSTRAINTS.
					if _samePreds() and psuf == qsuf == "vb":
						try:
							ret += [([fc_cooc_vu0 + ["!c%s %s" % (p[1][i], q[1][i])]], "ARGUMENT_CONSTR", -1) for i in xrange(1, len(p[1])) if p[1][i] != q[1][i]]
						except IndexError:
							pass

					# 
					# if p[0] == q[0] and "in" == psuf == qsuf and 2 < len(p[1]) and 2 < len(q[1]):
					# 	ret += [([fc_cooc + ["c%s %s" % (p[1][1], q[1][1]), "!c%s %s" % (p[1][2], q[1][2])]], "ARGUMENT_CONSTR", -1)]

				# MODALITY CONSTRAINTS.
				if not pa.nomodality:
					if psuf == qsuf == "vb" and p[0] == q[0] and p[1][0] != q[1][0]:
						try:
							ps, qs = [x for x in henryext.getLiteralsFromTerm(ctx, p[1][0]) if (x[0] in g_mp and x[1][1] == p[1][0]) or (x[0].endswith("vb") and x[1][2] == p[1][0])], \
									[x for x in henryext.getLiteralsFromTerm(ctx, q[1][0]) if (x[0] in g_mp and x[1][1] == q[1][0]) or (x[0].endswith("vb") and x[1][2] == q[1][0])]

							if len(ps) > 0 or len(qs) > 0:
								ret += [([fc_cooc + ["c%s %s" % (p[1][i], q[1][i]) for i in xrange(0,len(p[1])) if "u" not in p[1][i] or "u" not in q[1][i]]], "MODALITY_CONSTR", -1)]
						except IndexError:
							pass						

		# CREATE FEATURE FOR EACH DISJUNCTIVE CLAUSE
		# for disj in dnf_expl:
		#  	ret += [([disj[0]], disj[1], -0.1)]
		
		# CREATE FEATURE FOR DNF.
		ret += [([disj[0] for disj in dnf_expl], "!EXPLAINED_%s" % (predgen), p[5])]
			
	return ret