def make_rule(parent, children, fwords, ewords, parent_nt=None, children_nts=None): """Given parent and children as phrases (boxes), return a rule. A phrase is a list [fi, fj, ei, ej].""" if parent_nt is None: parent_nt = PHRASE_NT if children_nts is None: children_nts = [PHRASE_NT] * len(children) fi, fj, ei, ej = parent f = fwords[fi:fj] e = ewords[ei:ej] # maps from index in phrase to index in sentence # used to keep track of word alignment for lexical weighting fpos = [i for i in range(fi, fj)] epos = [i for i in range(ei, ej)] # None is used as a placeholder in gaps for var_idx in range(len(children)): child_fi, child_fj, child_ei, child_ej = children[var_idx] child_nt = children_nts[var_idx] # phrase index sub_fi = child_fi - fi sub_fj = child_fj - fi f[sub_fi] = child_nt fpos[sub_fi] = (child_fi, child_fj) for i in range(sub_fi + 1, sub_fj): f[i] = None fpos[i] = None # phrase index sub_ei = child_ei - ei sub_ej = child_ej - ei e[sub_ei] = (child_nt, var_idx) epos[sub_ei] = (child_ei, child_ej) for i in range(sub_ei + 1, sub_ej): e[i] = None epos[i] = None # remove placeholders f = [w for w in f if w is not None] fpos = [i for i in fpos if i is not None] epos = [i for i in epos if i is not None] # recover nonterminal permutation new_e = [] e2f = [] for w in e: if w is not None: if type(w) is tuple: new_e.append(w[0]) e2f.append(w[1]) else: new_e.append(w) # build rule rule = Rule() rule.init(parent_nt, f, new_e, e2f) rule.fpos = fpos rule.epos = epos return rule
def make_rule(self, a, source_phrase, fwords): '''fwords is a list of numbers and subphrases: the numbers are indices into the French sentence note by Fang: the input for make_rule is an initial phrase and a possible rule construction, which is plausible only for the f side at this moment. 'make_rule' ensures that the e sides of the subphrases fit into the initial phrase being subtracted and don't overlap. the outputed rule object includes information of the lexicalized symbols at both sides, their indices into the original sentence pair (fpos, epos), and possibly the word alignment info. ''' x, i1, j1, i2, j2 = source_phrase # omit trivial rules if len(fwords) == 1 and type(fwords[0]) is not int: return None #if not tight_phrases: fwords = fwords[:] fpos = [None for w in fwords ] # map from index in phrase to index in sentence ewords = a.ewords[j1:j2] elen = j2 - j1 index = 0 # nonterminal index flag = False for i in range(len(fwords)): fi = fwords[i] if type(fi) is int: # terminal symbol if a.faligned[fi]: flag = True fwords[i] = a.fwords[fi] fpos[i] = fi else: # nonterminal symbol (sub_x, sub_i1, sub_j1, sub_i2, sub_j2) = fi sub_j1 -= j1 sub_j2 -= j1 if not tight_phrases: # Check English holes # can't lie outside phrase if sub_j1 < 0 or sub_j2 > elen: return None # can't overlap for j in range(sub_j1, sub_j2): if type(ewords[j]) is tuple or ewords[j] is None: return None # Set first eword to var, rest to None # We'll clean up the Nones later v = sub_x fwords[i] = v fpos[i] = (sub_i1, sub_i2) ewords[sub_j1] = (v, index, sub_j1 + j1, sub_j2 + j1) for j in range(sub_j1 + 1, sub_j2): ewords[j] = None index += 1 # Require an aligned French word if self.require_aligned_terminal and not flag: return None epos = [] new_ewords = [] e2f = [] for i in range(elen): if ewords[i] is not None: if type(ewords[i]) is tuple: (v, index, ei, ej) = ewords[i] # force slash categories to be at left edge of English side #if force_english_prefix and len(new_ewords) != 0 and sym.clearindex(v) in prefix_labels: # return None e2f.append(index) new_ewords.append(v) epos.append((ei, ej)) else: new_ewords.append(ewords[i]) epos.append(i + j1) #r = XRule(x,rule.Phrase(tuple(fwords)), rule.Phrase(tuple(new_ewords))) r = Rule() r.lhs = PHRASE r.f = fwords r.e = new_ewords r.e2f = e2f r.fpos = fpos r.epos = epos r.span = (i1, i2, j1, j2) if self.keep_word_alignments: r.word_alignments = [] for fi in range(len(fpos)): if type(fpos[fi]) is int: for ei in range(len(epos)): if type(epos[ei]) is int: if a.aligned[fpos[fi]][epos[ei]]: r.word_alignments.append((fi, ei)) #print(r) return r