def scan(self, item):
     """
     This operation tries to scan over as many terminals as possible,
     but we only go as far as determinism allows.
     If we get to a nondeterminism, we stop scanning and add the relevant items to the agenda.
     """
     states = [item.dot]
     for sym in item.nextsymbols():
         if is_terminal(sym):
             arcs = self._wfsa.get_arcs(origin=states[-1], symbol=sym)
             if len(arcs) == 0:  # cannot scan the symbol
                 return False
             elif len(arcs) == 1:  # symbol is scanned deterministically
                 sto, _ = arcs[0]
                 states.append(sto)  # we do not create intermediate items, instead we scan as much as we can
             else:  # here we found a nondeterminism, we create all relevant items and add them to the agenda
                 # create items
                 for sto, w in arcs:
                     self._agenda.add(self.get_item(item.rule, sto, item.inner + tuple(states)))
                 return True
         else:  # that's it, scan bumped into a nonterminal symbol, time to wrap up
             break
     # here we should have scanned at least one terminal symbol
     # and we defined a deterministic path
     self._agenda.add(self.get_item(item.rule, states[-1], item.inner + tuple(states[:-1])))
     return True
Example #2
0
 def scan(self, item):
     """
     This operation tries to scan over as many terminals as possible,
     but we only go as far as determinism allows.
     If we get to a nondeterminism, we stop scanning and add the relevant items to the agenda.
     """
     states = [item.dot]
     for sym in item.nextsymbols():
         if is_terminal(sym):
             arcs = self._wfsa.get_arcs(origin=states[-1], symbol=sym)
             if len(arcs) == 0:  # cannot scan the symbol
                 return False
             elif len(arcs) == 1:  # symbol is scanned deterministically
                 sto, _ = arcs[0]
                 states.append(
                     sto
                 )  # we do not create intermediate items, instead we scan as much as we can
             else:  # here we found a nondeterminism, we create all relevant items and add them to the agenda
                 # create items
                 for sto, w in arcs:
                     self._agenda.add(
                         self.get_item(item.rule, sto,
                                       item.inner + tuple(states)))
                 return True
         else:  # that's it, scan bumped into a nonterminal symbol, time to wrap up
             break
     # here we should have scanned at least one terminal symbol
     # and we defined a deterministic path
     self._agenda.add(
         self.get_item(item.rule, states[-1],
                       item.inner + tuple(states[:-1])))
     return True
Example #3
0
 def add(self, rule):
     self._rules.append(rule)
     self._rules_by_lhs[rule.lhs].append(rule)
     self._nonterminals.add(rule.lhs)
     for s in rule.rhs:
         if is_terminal(s):
             self._terminals.add(s)
         else:
             self._nonterminals.add(s)
 def add(self, rule):
     self._rules.append(rule)
     self._rules_by_lhs[rule.lhs].append(rule)
     self._nonterminals.add(rule.lhs)
     for s in rule.rhs:
         if is_terminal(s):
             self._terminals.add(s)
         else:
             self._nonterminals.add(s)
Example #5
0
    def do(self, root='[S]', goal='[GOAL]'):

        wfsa = self._wfsa
        wcfg = self._wcfg
        agenda = self._agenda

        # start items of the kind
        # GOAL -> * ROOT, where * is an intial state of the wfsa
        if not any(self.axioms(root, start) for start in wfsa.iterinitial()):
            raise ValueError('No rule for the start symbol %s' % root)
        new_roots = set()

        while agenda:
            item = agenda.pop()  # always returns an active item

            if item.is_complete():
                # get slice variable for the current completed item
                u = self.slice_vars.get(item.rule.lhs, item.start, item.dot)

                # check whether the probability of the current completed item is above the threshold determined by
                # the slice variable
                if item.rule.log_prob > u:
                    # complete root item spanning from a start wfsa state to a final wfsa state
                    if item.rule.lhs == root and wfsa.is_initial(
                            item.start) and wfsa.is_final(item.dot):
                        agenda.make_complete(item)
                        new_roots.add((root, item.start, item.dot))
                        agenda.make_passive(item)
                    else:
                        if self.complete_others(item):
                            agenda.make_complete(item)
                            agenda.make_passive(item)
                        else:  # a complete state is only kept in case it could potentially complete others
                            agenda.discard(item)
            else:
                if is_terminal(item.next):
                    # fire the operation 'scan'
                    self.scan(item)
                    agenda.discard(
                        item
                    )  # scanning renders incomplete items of this kind useless
                else:
                    if not wcfg.can_rewrite(
                            item.next
                    ):  # if the NT does not exist this item is useless
                        agenda.discard(item)
                    else:
                        if not self.prediction(
                                item
                        ):  # try to predict, otherwise try to complete itself
                            self.complete_itself(item)
                        agenda.make_passive(item)
        # converts complete items into rules
        logging.debug('Making forest...')
        return self.get_cfg(goal, root)
Example #6
0
 def get_intersected_rule(self, item):
     lhs = make_symbol(item.rule.lhs, item.start, item.dot)
     positions = item.inner + (item.dot, )
     rhs = [
         make_symbol(sym, positions[i], positions[i + 1])
         for i, sym in enumerate(item.rule.rhs)
     ]
     # compute the wfsa contribution (assuming that it is with a log-semiring)
     wfsa_weight = 0.0
     for i, sym in enumerate(item.rule.rhs):
         if is_terminal(sym):
             # assuming a log from positions[i] to positions[i + 1] with label `sym`
             wfsa_weight += self._wfsa.arc_weight(positions[i],
                                                  positions[i + 1], sym)
     return Rule(lhs, rhs, item.rule.log_prob + wfsa_weight)
    def do(self, root='[S]', goal='[GOAL]'):

        wfsa = self._wfsa
        wcfg = self._wcfg
        agenda = self._agenda

        # start items of the kind
        # GOAL -> * ROOT, where * is an intial state of the wfsa
        if not any(self.axioms(root, start) for start in wfsa.iterinitial()):
            raise ValueError('No rule for the start symbol %s' % root)
        new_roots = set()

        while agenda:
            item = agenda.pop()  # always returns an active item

            if item.is_complete():
                # get slice variable for the current completed item
                u = self.slice_vars.get(item.rule.lhs, item.start, item.dot)

                # check whether the probability of the current completed item is above the threshold determined by
                # the slice variable
                if item.rule.log_prob > u:
                    # complete root item spanning from a start wfsa state to a final wfsa state
                    if item.rule.lhs == root and wfsa.is_initial(item.start) and wfsa.is_final(item.dot):
                        agenda.make_complete(item)
                        new_roots.add((root, item.start, item.dot))
                        agenda.make_passive(item)
                    else:
                        if self.complete_others(item):
                            agenda.make_complete(item)
                            agenda.make_passive(item)
                        else:  # a complete state is only kept in case it could potentially complete others
                            agenda.discard(item)
            else:
                if is_terminal(item.next):
                    # fire the operation 'scan'
                    self.scan(item)
                    agenda.discard(item)  # scanning renders incomplete items of this kind useless
                else:
                    if not wcfg.can_rewrite(item.next):  # if the NT does not exist this item is useless
                        agenda.discard(item)
                    else:
                        if not self.prediction(item):  # try to predict, otherwise try to complete itself
                            self.complete_itself(item)
                        agenda.make_passive(item)
        # converts complete items into rules
        logging.debug('Making forest...')
        return self.get_cfg(goal, root)
 def recursion(derivation, projection, Q, wcfg, counts):
     #print 'd:', '|'.join(str(r) for r in derivation)
     #print 'p:', projection
     #print 'Q:', Q
     if Q:
         sym = Q.popleft()
         #print ' pop:', sym
         if is_terminal(sym):
             recursion(derivation, [sym] + projection, Q, wcfg, counts)
         else:
             for rule in wcfg[sym]:
                 #print '  rule:', rule
                 QQ = deque(Q)
                 QQ.extendleft(rule.rhs)
                 recursion(derivation + [rule], projection, QQ, wcfg, counts)
     else:
         counts['d'][tuple(derivation)] += 1
         counts['p'][tuple(projection)] += 1
Example #9
0
def scan(item, sentence):
    """
    Scan a terminal (compatible with CKY and Earley).
    
    Inference rule:
    
        [X -> alpha * x beta, [i ... j]]
        ------------------------------------    sentence[j] == x
        [X -> alpha x * beta, [i ... j + 1]]
    
    :param item: an active Item
    :param sentence: a list/tuple of terminals
    :returns: an Item or None
    """
    assert is_terminal(item.next), 'Only terminal symbols can be scanned, got %s' % item.next
    if item.dot < len(sentence) and sentence[item.dot] == item.next:
        return item.advance(item.dot + 1)
    else:
        return None
Example #10
0
 def recursion(derivation, projection, Q, wcfg, counts):
     #print 'd:', '|'.join(str(r) for r in derivation)
     #print 'p:', projection
     #print 'Q:', Q
     if Q:
         sym = Q.popleft()
         #print ' pop:', sym
         if is_terminal(sym):
             recursion(derivation, [sym] + projection, Q, wcfg, counts)
         else:
             for rule in wcfg[sym]:
                 #print '  rule:', rule
                 QQ = deque(Q)
                 QQ.extendleft(rule.rhs)
                 recursion(derivation + [rule], projection, QQ, wcfg,
                           counts)
     else:
         counts['d'][tuple(derivation)] += 1
         counts['p'][tuple(projection)] += 1
Example #11
0
def scan(item, sentence):
    """
    Scan a terminal (compatible with CKY and Earley).
    
    Inference rule:
    
        [X -> alpha * x beta, [i ... j]]
        ------------------------------------    sentence[j] == x
        [X -> alpha x * beta, [i ... j + 1]]
    
    :param item: an active Item
    :param sentence: a list/tuple of terminals
    :returns: an Item or None
    """
    assert is_terminal(
        item.next), 'Only terminal symbols can be scanned, got %s' % item.next
    if item.dot < len(sentence) and sentence[item.dot] == item.next:
        return item.advance(item.dot + 1)
    else:
        return None
Example #12
0
    def recursion(derivation, projection, Q, wcfg, scores):
        # print 'd:', '|'.join(str(r) for r in derivation)
        # print 'p:', projection
        # print 'Q:', Q
        if Q:
            sym = Q.popleft()
            # print ' pop:', sym
            if is_terminal(sym):
                recursion(derivation, [sym] + projection, Q, wcfg, scores)
            else:
                for rule in wcfg[sym]:
                    # print '  rule:', rule
                    QQ = deque(Q)
                    QQ.extendleft(rule.rhs)
                    recursion(derivation + [rule], projection, QQ, wcfg, scores)
        else:
            score = sum([r.log_prob for r in derivation])

            if scores['d'][tuple(derivation)] < score:
                scores['d'][tuple(derivation)] = score

            if scores['p'][tuple(projection)] < score:
                scores['p'][tuple(projection)] = score