def cbScoreFunction( ctx ): ret = [] set_p = henryext.getLiterals( ctx ) for p in set_p: if henryext.isTimeout(ctx): return ret if "=" == p[0]: continue if 1 == p[8]: ret += [([["p%d" % p[2]]], "$PROHIBITED_%s" % (p[0]), -10000)] else: # COST FOR p. if 4 == p[6]: ret += [([["p%d" % p[2]]], "$HYPOTHESIZED_%s" % (p[0]), -10000)] #-p[5]-0.001)] else: ret += [([["p%d" % p[2]]], "HYPOTHESIZED_%s_%s" % (p[0], p[3]), 1)] #-p[5]-0.001)] #ret += [([["p%d" % p[2]]], "!HYPOTHESIZED_%s" % (p[0]), -p[5]-0.001)] if "!=" == p[0]: continue # CREATE EXPLANATION FACTORS FOR p. dnf_expl, expl = [], defaultdict(list) for q in set_p: if q[0] in ["=", "!="]: continue if p[2] == q[2]: continue if 4 == q[6]: continue if "" != p[4] and "" != q[4]: # SELF-EXPLANATION IS PROHIBITED. (p => q => p) if repr(q[2]) in p[4].split(","): continue fc_cooc = ["p%d" % p[2], "p%d" % q[2]] fc_cooc_vuall = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(len(p[1]))] if len(p[1]) == len(q[1]) else []) # if (2 == p[6] and 3 == q[6]) or (2 == q[6] and 3 == p[6]): # ret += [([fc_cooc], "COOC_%s-%s" % (max(p[0],q[0]), min(p[0],q[0])), 1)] # if 3 == p[6] and 3 == q[6]: # ret += [([fc_cooc], "HYPO_COOC_%s-%s" % (max(p[0],q[0]), min(p[0],q[0])), 1)] # if p[0] != q[0] and (("inst_" in p[0] and "inst_" not in q[0]) or ("inst_" in q[0] and "inst_" not in p[0])) and p[0] in plan_predicates and q[0] in plan_predicates: # ret += [([fc_cooc], "HYPO_PLANPREDS_%s-%s" % (max(p[0],q[0]), min(p[0],q[0])), 1)] # # EXPLANATION FOR p. if 4 != p[6]: if p[0] != q[0] and repr(p[2]) in q[4].split(","): expl[(q[7], q[3])] += ["p%d" % q[2]] # # EXPLANATION BY UNIFICATION. # if "" != p[4] and "" != q[4]: # # IF THEY ARE EXPLAINING THE SAME THING, JUST IGNORE THEM. (p => q, p => q) # if 0 < len(set(p[4].split(",")) & set(q[4].split(","))): continue if g_disj.has_key("%s/1\t%s/1" % (p[0], q[0])) or g_disj.has_key("%s/1\t%s/1" % (q[0], p[0])): if pa.disjoint: ret += [([fc_cooc_vuall], "DISJOINT_%s-%s" % (max(p[0],q[0]), min(p[0],q[0])), -9999)] elif pa.disjointfeature: ret += [([fc_cooc_vuall], "DISJOINT_%s-%s" % (max(p[0],q[0]), min(p[0],q[0])), 1)] # # BELOW ARE EXPLANATION BY UNIFICATION; AVOID DOUBLE-COUNT. _bothStartsWith = lambda x: p[0].startswith(x) and q[0].startswith(x) _samePreds = lambda: p[0] == q[0] # CLASSICAL UNIFICATION. if _samePreds(): if 4 == p[6]: dnf_expl += [fc_cooc_vuall] elif (p[5] > q[5] or (p[5] == q[5] and p[2] > q[2])): dnf_expl += [fc_cooc_vuall] ret += [([fc_cooc_vuall], "UNIFY_%s" % p[0], 1)] # CREATE FEATURE FOR THE DNF. if 4 == p[6]: ret += [(dnf_expl, "$EXPLAINED_BY_UNIF_%s" % (p[0]), 10000)] else: # ret += [(dnf_expl, "!EXPLAINED_BY_UNIF_%s" % (p[0]), 1)] #p[5])] #ret += [(expl.values(), "EXPLAINED_BY_LIT_%s" % (p[0]), 1)] #p[5])] # ret += [(dnf_expl+expl.values(), "!EXPLAINED_BY_LIT_%s" % (p[0]), p[5])] ret += [(dnf_expl+expl.values(), "EXPLAINED_BY_%s_%s" % (p[0], p[3]), 1)] for k, v in expl.iteritems(): # ret += [([v], "AXIOM_%s" % (k[1] if not pa.genaxiom else "_".join(k[1].split("_")[:-1])), 1)] if "" == p[10]: continue ret += [([v], "AXIOM_%s" % p[10], 1)] return ret
def cbScoreFunction( ctx ): if pa.donothing: return [] ret = [] set_p = henryext.getLiterals( ctx ) # Explicit Non-identity if not pa.noexplident: for lfs in g_explnids_list: eq, inst = _getMatchingSets( ctx, [_break(lf.replace( "'", "" )) for lf in lfs.split( " & " )] ) if 0 < len(eq) and "" != eq["x"][0] and "" != eq["y"][0]: ret += [([["c%s %s" % (eq["x"][0], eq["y"][0])]], "EXPLICIT_NONIDENT", -1)] same_pred_args = {} cp_prior = {} fr_cache = {} for p in set_p: if henryext.isTimeout(ctx): return ret if p[0] in ["=", "!="]: continue # COST FOR p. def _getDepth(x): return 0 if "" == x[4].strip() else len(x[4].split(",")) psuf = re.findall("-([^-]+)$", p[0]) psuf = (psuf[0] if 0 < len(psuf) else "") predgen = psuf if "" == predgen: if "FN" in p[0]: predgen = p[0] if "synset" in p[0]: predgen = "synset" if "Script" in p[0]: predgen = "Script" if "cause" in p[0]: predgen = "cause" if "entail" in p[0]: predgen = "entail" ret += [([["p%d" % p[2]]], "!HYPOTHESIZED_%s" % (predgen), -p[5]-0.001)] # FUNCTIONAL WORDS if not pa.nofuncrel: for i, fr in enumerate(g_funcrel[p[0]]): if fr_cache.has_key(i): continue fr_cache[i] = 1 lfs, score = fr eq, inst = _getMatchingSets( ctx, [_break(lf.replace( "'", "" )) for lf in lfs.split( " & " )] ) if 2 == len(inst) and 2 <= len(eq["x2"]): ret += [([["!c%s %s" % (eq["x2"][0], eq["x2"][1]), "c%s %s" % (eq["x1"][0], eq["x1"][1])]], "FUNC_REL", -1)] # CREATE EXPLANATION FACTORS FOR p. dnf_expl = [] # ARGUMENTS AT THE SAME OBSERVABLE PREDICATES. if not pa.noargineq and "vb" == psuf and 4 == len(p[1]): if not same_pred_args.has_key("%s-%s" % (p[1][1], p[1][2])) and not same_pred_args.has_key("%s-%s" % (p[1][2], p[1][1])): ret += [([["c%s %s" % (p[1][1], p[1][2])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][1], p[1][2])] = 1 if not same_pred_args.has_key("%s-%s" % (p[1][2], p[1][3])) and not same_pred_args.has_key("%s-%s" % (p[1][3], p[1][2])): ret += [([["c%s %s" % (p[1][2], p[1][3])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][2], p[1][3])] = 1 if not same_pred_args.has_key("%s-%s" % (p[1][1], p[1][3])) and not same_pred_args.has_key("%s-%s" % (p[1][3], p[1][1])): ret += [([["c%s %s" % (p[1][1], p[1][3])]], "ARGS_IN_SAME_PREDS", 1)]; same_pred_args["%s-%s" % (p[1][1], p[1][3])] = 1 for q in set_p: if q[0] in ["=", "!="]: continue if p[2] == q[2]: continue if "" != p[4] and "" != q[4]: # FRAME x FRAME DISJOINTNESS. if g_fndisj.has_key("%s-%s" % (p[0], q[0])): ret += [([fc_cooc + ["c%s %s" % (p[1][0], q[1][0])]], "!FN_DISJOINT_FRAMES", -1000)] # IF THEY ARE EXPLAINING THE SAME THING, JUST IGNORE THEM. (p => q, p => q) #if 0 < len(set(p[4].split(",")) & set(q[4].split(","))): continue # SELF-EXPLANATION IS PROHIBITED. (p => q => p) #if repr(q[2]) in p[4].split(","): continue psuf, qsuf = re.findall("-([^-]+)$", p[0]), re.findall("-([^-]+)$", q[0]) psuf, qsuf = (psuf[0] if 0 < len(psuf) else ""), (qsuf[0] if 0 < len(qsuf) else "") fc_cooc = ["p%d" % p[2], "p%d" % q[2]] if len(q[1]) > 0 and len(p[1]) > 0: fc_cooc_vu0 = fc_cooc + ["c%s %s" % (p[1][0], q[1][0])] else: fc_cooc_vu0 = fc_cooc if len(q[1]) > 1 and len(p[1]) > 1: fc_cooc_vu1 = fc_cooc + ["c%s %s" % (p[1][min(1, len(p[1]))], q[1][min(1, len(q[1]))])] fc_cooc_vuall1 = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(1,len(p[1]))] if 1 < len(p[1]) and len(p[1]) == len(q[1]) else []) fc_cooc_vuall = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(len(p[1]))] if len(p[1]) == len(q[1]) else []) # # EXPLANATION FOR p. if p[0] != q[0] and repr(p[2]) in q[4].split(","): dnf_expl += [(fc_cooc, "", 1)] # PRONOUN COMPATIBILITY. if p[0] in "person per".split() and q[0] in "male female".split(): dnf_expl += [(fc_cooc_vu1, "", 1)] if "thing" == p[0] and q[0] in "neuter".split(): dnf_expl += [(fc_cooc_vu1, "", 1)] # nn => PRONOUN if p[0] != q[0] and (p[0] in g_prnp or p[0] in g_pnp) and "nn" == qsuf: dnf_expl += [(fc_cooc_vu1, "", 1)] # # EXPLANATION BY UNIFICATION. # BELOW ARE EXPLANATION BY UNIFICATION; AVOID DOUBLE-COUNT. _bothStartsWith = lambda x: p[0].startswith(x) and q[0].startswith(x) _samePreds = lambda: p[0] == q[0] # CLASSICAL UNIFICATION. if _samePreds() and (p[5] > q[5] or (p[5] == q[5] and p[2] > q[2])): for cu in g_wep: if not pa.nowep and None != cu.search(p[0]): break else: if not (pa.nosynunify and p[0].startswith("synset")): dnf_expl += [(fc_cooc_vuall, "UNIFY_PROPOSITIONS", 1)] # UNIFICATION COST. if not pa.nocp and p[0] == q[0] and len(p[1]) == len(q[1]) and p[2] > q[2]: for i in xrange(len(p[1])): if p[1][i] == q[1][i]: continue # FEATURE MAP CONVERTER. fe, fv = "%s.%d" % (p[0], i), 0 for matcher, after, value in g_fm: fes, n = matcher.subn(after, fe) fv = value #fv = 0 if 0 == n: continue # ADD AS AN EVIDENCE OF VARIABLE UNIFICATION. if not (pa.nosynunify and "SYNSET" in fes): # VALUE CONVERTER. if "$abstlevel" == fv: fv = g_word_abst.get(p[0], 0) elif "$wordfreq" == fv: fv = g_word_freq.get(p[0].split("-")[-2], 0) ret += [([fc_cooc + ["c%s %s" % (p[1][i], q[1][i])]], fes, float(fv))] # if p[0].startswith("synset1"): # syn_dist = len(corpus.wordnet._synset_from_pos_and_offset("n", int(p[0][7:])).hypernym_distances()) # # BOXER AUX x SYNSET (>11) # if p[0] != q[0] and 10<=syn_dist and q[0] in g_pnp: # ret += [([fc_cooc_vu1], "PN-%s_SYN-%s_UNIFY_Y" % (q[0], p[0][6:]), 1)] # # PRONOUN x SYNSET (>11) # if p[0] != q[0] and 10<=syn_dist and q[0] in g_prnp: # ret += [([fc_cooc_vu1], "PRO-%s_SYN-%s_UNIFY_Y" % (q[0], p[0][6:]), 1)] # BOXER AUX x PRONOUN if not pa.noinc and p[0] != q[0] and p[0] in g_pnp and q[0] in g_prnp: ret += [([fc_cooc_vu1], "PN-%s_PRO-%s_UNIFY_Y" % (p[0], q[0]), 1)] # FRAME x SYNSET: FRAMENET SELECTIONAL RESTRICTION. if not pa.nofnsr: if p[0].startswith("synset") and q[0].startswith("FN"): syn, fn = (p, q) if p[0].startswith("synset") and q[0].startswith("FN") else (q, p) fnsr = g_fnsr.get( "%s-%s" % (syn[0], fn[0]) ) if None != fnsr: ret += [([fc_cooc + ["c%s %s" % (fn[1][fnsr[3].index("x")], syn[1][1])]], "FN_%s_SEL_RESTR_Y" % fn[0], -1+fnsr[1])] # SYMMETRIC FEATURES. if p[2] > q[2]: # WORDNET FEATURE. if p[0] != q[0] and _bothStartsWith("synset"): if g_wnanto.has_key("%s-%s" % (q[0][7:], p[0][7:])) or g_wnanto.has_key("%s-%s" % (p[0][7:], q[0][7:])): if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "WN_ANTONYMOUS_SYNSET_Y", -1)] else: prnt1, prnt2 = g_wnhier.get( p[0][6:] ), g_wnhier.get( q[0][6:] ) if None != prnt1 and prnt1 == prnt2 and p[1][1] != q[1][1]: if not pa.noinc: ret += [([fc_cooc_vu1], "WN_SIBLING_SYNSET_Y", -1)] if not pa.noder and len(p[1]) > 1 and len(q[1]) > 1 and p[1][1] != q[1][1] and (g_wnder.has_key("%s-%s" % (p[0], q[0])) or g_wnder.has_key("%s-%s" % (q[0], p[0]))): ret += [([fc_cooc_vu1], "WN_DERIVATIONAL_Y", 1)] # HAND-CRAFTED INCOMPATIBILITY. if g_handinc.has_key("%s %s" % (p[0], q[0])) or g_handinc.has_key("%s %s" % (q[0], p[0])): if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "HD_INCOMPATIBLE_Y", -1)] # try: dnf_expl += [(fc_cooc_vu1, "PRONOUN_%s_SENTDIST_%s" % (p[0], min(1,abs(int(p[1][1].split("x")[0]) - int(q[1][1].split("x")[0])))), 1) ] # except ValueError: pass # PROPER NAMES THAT DON'T BELONG TO THE SAME SYNSET if "nn" == psuf == qsuf and p[0] != q[0]: def _isPn(x): if len(x[1])<2: return False, [] f_p_pn, synsets = False, [] for pp in henryext.getLiteralsFromTerm(ctx, x[1][1]): if pp[0] in g_pnp: f_p_pn = True if pp[0].startswith("synset"): synsets += [pp[0]] return f_p_pn, synsets pj, qj = _isPn(p), _isPn(q) if pj[0] and qj[0] and 0 == len(set(pj[1]) & set(qj[1])): if not pa.noinc and p[1][1] != q[1][1]: ret += [([fc_cooc_vu1], "DIFFERENT_PROPERNAMES_UNIFIED", -1)] # # CONSTRAINTS # ARGUMENT CONSTRAINTS. if not pa.noargineq: if p[0] == q[0] and len(p[1]) == len(q[1]): eas = ["%s%s %s" % ("c" if 0==i else "!c", p[1][i], q[1][i]) for i in xrange(len(p[1])) if ("e" in p[1][i] or "e" in q[1][i]) and p[1][i] != q[1][i]] if 2 <= len(eas): ret += [([fc_cooc + eas], "ARGUMENT_CONSTR", -1)] # EVENT-DENOTING VARIBLE CONSTRAINTS. if _samePreds() and psuf == qsuf == "vb": try: ret += [([fc_cooc_vu0 + ["!c%s %s" % (p[1][i], q[1][i])]], "ARGUMENT_CONSTR", -1) for i in xrange(1, len(p[1])) if p[1][i] != q[1][i]] except IndexError: pass # # if p[0] == q[0] and "in" == psuf == qsuf and 2 < len(p[1]) and 2 < len(q[1]): # ret += [([fc_cooc + ["c%s %s" % (p[1][1], q[1][1]), "!c%s %s" % (p[1][2], q[1][2])]], "ARGUMENT_CONSTR", -1)] # MODALITY CONSTRAINTS. if not pa.nomodality: if psuf == qsuf == "vb" and p[0] == q[0] and p[1][0] != q[1][0]: try: ps, qs = [x for x in henryext.getLiteralsFromTerm(ctx, p[1][0]) if (x[0] in g_mp and x[1][1] == p[1][0]) or (x[0].endswith("vb") and x[1][2] == p[1][0])], \ [x for x in henryext.getLiteralsFromTerm(ctx, q[1][0]) if (x[0] in g_mp and x[1][1] == q[1][0]) or (x[0].endswith("vb") and x[1][2] == q[1][0])] if len(ps) > 0 or len(qs) > 0: ret += [([fc_cooc + ["c%s %s" % (p[1][i], q[1][i]) for i in xrange(0,len(p[1])) if "u" not in p[1][i] or "u" not in q[1][i]]], "MODALITY_CONSTR", -1)] except IndexError: pass # CREATE FEATURE FOR EACH DISJUNCTIVE CLAUSE # for disj in dnf_expl: # ret += [([disj[0]], disj[1], -0.1)] # CREATE FEATURE FOR DNF. ret += [([disj[0] for disj in dnf_expl], "!EXPLAINED_%s" % (predgen), p[5])] return ret
def cbScoreFunction( ctx ): ret = [] set_p = henryext.getLiterals( ctx ) for p in set_p: if henryext.isTimeout(ctx): return ret if "=" == p[0]: continue # COST FOR p. ret += [([["p%d" % p[2]]], "!HYPOTHESIZED_%s_%s" % (p[0], p[2]), -p[5]-0.001)] if "!=" == p[0]: continue # CREATE EXPLANATION FACTORS FOR p. dnf_expl, expl = [], defaultdict(list) for q in set_p: if q[0] in ["=", "!="]: continue if p[2] == q[2]: continue fc_cooc = ["p%d" % p[2], "p%d" % q[2]] fc_cooc_vuall = fc_cooc + (["c%s %s" % (p[1][i], q[1][i]) for i in xrange(len(p[1]))] if len(p[1]) == len(q[1]) else []) # # EXPLANATION FOR p. if repr(p[2]) in q[4].split(","): # SELF-EXPLANATION IS PROHIBITED. (p => q => p) if repr(q[2]) in p[4].split(","): continue expl[q[7]] += ["p%d" % q[2]] # # EXPLANATION BY UNIFICATION. if "" != p[4] and "" != q[4]: # IF THEY HAVE EXPLAINER-EXPLAINEE RELATIONS SHIPS, DO NOT UNIFY THEM. (p => ... => q) if repr(q[2]) in p[4].split(","): continue if repr(p[2]) in q[4].split(","): continue #if 0 < len(set(p[4].split(",")) & set(q[4].split(","))): continue # # BELOW ARE EXPLANATION BY UNIFICATION; AVOID DOUBLE-COUNT. _bothStartsWith = lambda x: p[0].startswith(x) and q[0].startswith(x) _samePreds = lambda: p[0] == q[0] # CLASSICAL UNIFICATION. if _samePreds() and (p[5] > q[5] or (p[5] == q[5] and p[2] > q[2])): for i in xrange(len(p[1])): if p[1][i] != q[1][i] and p[1][i][0].isupper() and q[1][i][0].isupper(): break else: dnf_expl += [fc_cooc_vuall] # GROUPING BY THE CONJUNCTIONS. dnf_expl += expl.values() # CREATE FEATURE FOR THE DNF. ret += [(dnf_expl, "!EXPLAINED_%s_%s" % (p[0], p[2]), p[5])] return ret