Example #1
0
 def add(self, r, estcost=0.):
     if r.f.arity() == 1 and len(r.f) == 1:
         log.write("unary rule: %s\n" % r)
         self.unary_rules.setdefault(sym.clearindex(r.f[0]), RuleBin(self.threshold, self.limit)).add(estcost, r)
         self.unary_less_than.add((sym.clearindex(r.f[0]), r.lhs))
     else:
         cur = self.root
         for f in r.f:
             if sym.isvar(f):
                 f = sym.clearindex(f)
             cur[1].setdefault(f, [None, {}])
             cur = cur[1][f]
         if cur[0] is None:
             cur[0] = RuleBin(self.threshold, self.limit)
             self.rulebin_count += 1
         bin = cur[0]
         bin.add(estcost, r)
         bin.prune()
     self.count += 1
Example #2
0
def parse(n, xrules, rules):
    """
    n = length of sentence
    xrules = rules with position info, to be assembled into forest
    rules = grammar of rules from all sentences
    N.B. This does not work properly without tight_phrases"""

    chart = [[dict((v, None) for v in nonterminals) for j in xrange(n + 1)]
             for i in xrange(n + 1)]

    for l in xrange(1, n + 1):
        for i in xrange(n - l + 1):
            k = i + l

            for x in nonterminals:
                if x != START:
                    item = forest.Item(x, i, k)
                    for r in xrules.get((x, i, k), ()):
                        ants = []
                        for fi in xrange(len(r.f)):
                            if type(r.fpos[fi]) is tuple:
                                (subi, subj) = r.fpos[fi]
                                ants.append(chart[subi][subj][sym.clearindex(
                                    r.f[fi])])
                        if None not in ants:
                            item.derive(
                                ants, rules[r], r.scores[0]
                            )  # the reason for the lookup in rules is to allow duplicate rules to be freed
                    if len(item.deds) == 0:
                        item = None
                    if item is not None:
                        chart[i][k][x] = item

                else:  # x == START
                    item = forest.Item(x, i, k)

                    # S -> X
                    if i == 0:
                        for y in nonterminals:
                            if y != START and chart[i][k][y] is not None:
                                item.derive([chart[i][k][y]], gluestop[y])

                    # S -> S X
                    for j in xrange(i, k + 1):
                        for y in nonterminals:
                            if chart[i][j][START] is not None and chart[j][k][
                                    y] is not None:
                                item.derive(
                                    [chart[i][j][START], chart[j][k][y]],
                                    glue[y])

                    if len(item.deds) > 0:
                        chart[i][k][x] = item
                        for ded in item.deds:
                            ded.rule.scores = [
                                ded.rule.scores[0] + 1. / len(item.deds)
                            ]

    covered = [False] * n
    spans = []
    # find biggest nonoverlapping spans
    for l in xrange(n, 0, -1):
        for i in xrange(n - l + 1):
            k = i + l

            flag = False
            for v in reversed(nonterminals):
                if chart[i][k][v] is not None:
                    flag = True
            if flag:
                for j in xrange(i, k):
                    # don't let any of the spans overlap
                    if covered[j]:
                        flag = False
                if flag:
                    for j in xrange(i, k):
                        covered[j] = True
                    spans.append((i, k))

    # old buggy version
    #spans = [(0,n)]
    #sys.stderr.write("%s\n" % spans)

    # put in topological order
    itemlists = []
    for (start, stop) in spans:
        items = []
        for l in xrange(1, stop - start + 1):
            for i in xrange(start, stop - l + 1):
                k = i + l
                for v in nonterminals:
                    if chart[i][k][v] is not None:
                        items.append(chart[i][k][v])
        if len(items) > 0:
            itemlists.append(items)

    return itemlists
Example #3
0
                ewords[j] = None
            index += 1

    # Require an aligned French word
    if opts.require_aligned_terminal and not flag:
        return None

    epos = []
    new_ewords = []
    for i in xrange(elen):
        if ewords[i] is not None:
            if type(ewords[i]) is tuple:
                (v, ei, ej) = ewords[i]
                # force slash categories to be at left edge of English side
                if force_english_prefix and len(
                        new_ewords) != 0 and sym.clearindex(
                            v) in prefix_labels:
                    return None
                new_ewords.append(v)
                epos.append((ei, ej))
            else:
                new_ewords.append(ewords[i])
                epos.append(i + j1)

    r = XRule(x, rule.Phrase(tuple(fwords)), rule.Phrase(tuple(new_ewords)))
    r.fpos = fpos
    r.epos = epos
    r.span = (i1, i2, j1, j2)

    if opts.keep_word_alignments:
        r.word_alignments = []
        for fi in xrange(len(fpos)):
Example #4
0
def parse(n, xrules, rules):
    """
    n = length of sentence
    xrules = rules with position info, to be assembled into forest
    rules = grammar of rules from all sentences
    N.B. This does not work properly without tight_phrases"""

    chart = [[dict((v,None) for v in nonterminals) for j in xrange(n+1)] for i in xrange(n+1)]

    for l in xrange(1, n+1):
        for i in xrange(n-l+1):
            k = i+l

            for x in nonterminals:
                if x != START:
                    item = forest.Item(x,i,k)
                    for r in xrules.get((x,i,k), ()):
                        ants = []
                        for fi in xrange(len(r.f)):
                            if type(r.fpos[fi]) is tuple:
                                (subi, subj) = r.fpos[fi]
                                ants.append(chart[subi][subj][sym.clearindex(r.f[fi])])
                        if None not in ants:
                            item.derive(ants, rules[r], r.scores[0]) # the reason for the lookup in rules is to allow duplicate rules to be freed
                    if len(item.deds) == 0:
                        item = None
                    if item is not None:
                        chart[i][k][x] = item

                else: # x == START
                    item = forest.Item(x,i,k)

                    # S -> X
                    if i == 0:
                        for y in nonterminals:
                            if y != START and chart[i][k][y] is not None:
                                item.derive([chart[i][k][y]], gluestop[y])

                    # S -> S X
                    for j in xrange(i,k+1):
                        for y in nonterminals:
                            if chart[i][j][START] is not None and chart[j][k][y] is not None:
                                item.derive([chart[i][j][START], chart[j][k][y]], glue[y])

                    if len(item.deds) > 0:
                        chart[i][k][x] = item
                        for ded in item.deds:
                            ded.rule.scores = [ded.rule.scores[0] + 1./len(item.deds)]

    covered = [False] * n
    spans = []
    # find biggest nonoverlapping spans
    for l in xrange(n,0,-1):
        for i in xrange(n-l+1):
            k = i+l

            flag = False
            for v in reversed(nonterminals):
                if chart[i][k][v] is not None:
                    flag = True
            if flag:
                for j in xrange(i,k):
                    # don't let any of the spans overlap
                    if covered[j]:
                        flag = False
                if flag:
                    for j in xrange(i,k):
                        covered[j] = True
                    spans.append((i,k))

    # old buggy version
    #spans = [(0,n)]
    #sys.stderr.write("%s\n" % spans)

    # put in topological order
    itemlists = []
    for (start, stop) in spans:
        items = []
        for l in xrange(1, stop-start+1):
            for i in xrange(start, stop-l+1):
                k = i+l
                for v in nonterminals:
                    if chart[i][k][v] is not None:
                        items.append(chart[i][k][v])
        if len(items) > 0:
            itemlists.append(items)

    return itemlists
Example #5
0
            for j in xrange(sub_j1+1,sub_j2):
                ewords[j] = None
            index += 1

    # Require an aligned French word
    if opts.require_aligned_terminal and not flag:
        return None

    epos = []
    new_ewords = []
    for i in xrange(elen):
        if ewords[i] is not None:
            if type(ewords[i]) is tuple:
                (v, ei, ej) = ewords[i]
                # force slash categories to be at left edge of English side
                if force_english_prefix and len(new_ewords) != 0 and sym.clearindex(v) in prefix_labels:
                    return None
                new_ewords.append(v)
                epos.append((ei,ej))
            else:
                new_ewords.append(ewords[i])
                epos.append(i+j1)


    r = XRule(x,rule.Phrase(tuple(fwords)), rule.Phrase(tuple(new_ewords)))
    r.fpos = fpos
    r.epos = epos
    r.span = (i1,i2,j1,j2)

    if opts.keep_word_alignments:
        r.word_alignments = []