def scan(self, item): """ This operation tries to scan over as many terminals as possible, but we only go as far as determinism allows. If we get to a nondeterminism, we stop scanning and add the relevant items to the agenda. """ states = [item.dot] for sym in item.nextsymbols(): if is_terminal(sym): arcs = self._wfsa.get_arcs(origin=states[-1], symbol=sym) if len(arcs) == 0: # cannot scan the symbol return False elif len(arcs) == 1: # symbol is scanned deterministically sto, _ = arcs[0] states.append(sto) # we do not create intermediate items, instead we scan as much as we can else: # here we found a nondeterminism, we create all relevant items and add them to the agenda # create items for sto, w in arcs: self._agenda.add(self.get_item(item.rule, sto, item.inner + tuple(states))) return True else: # that's it, scan bumped into a nonterminal symbol, time to wrap up break # here we should have scanned at least one terminal symbol # and we defined a deterministic path self._agenda.add(self.get_item(item.rule, states[-1], item.inner + tuple(states[:-1]))) return True
def scan(self, item): """ This operation tries to scan over as many terminals as possible, but we only go as far as determinism allows. If we get to a nondeterminism, we stop scanning and add the relevant items to the agenda. """ states = [item.dot] for sym in item.nextsymbols(): if is_terminal(sym): arcs = self._wfsa.get_arcs(origin=states[-1], symbol=sym) if len(arcs) == 0: # cannot scan the symbol return False elif len(arcs) == 1: # symbol is scanned deterministically sto, _ = arcs[0] states.append( sto ) # we do not create intermediate items, instead we scan as much as we can else: # here we found a nondeterminism, we create all relevant items and add them to the agenda # create items for sto, w in arcs: self._agenda.add( self.get_item(item.rule, sto, item.inner + tuple(states))) return True else: # that's it, scan bumped into a nonterminal symbol, time to wrap up break # here we should have scanned at least one terminal symbol # and we defined a deterministic path self._agenda.add( self.get_item(item.rule, states[-1], item.inner + tuple(states[:-1]))) return True
def add(self, rule): self._rules.append(rule) self._rules_by_lhs[rule.lhs].append(rule) self._nonterminals.add(rule.lhs) for s in rule.rhs: if is_terminal(s): self._terminals.add(s) else: self._nonterminals.add(s)
def do(self, root='[S]', goal='[GOAL]'): wfsa = self._wfsa wcfg = self._wcfg agenda = self._agenda # start items of the kind # GOAL -> * ROOT, where * is an intial state of the wfsa if not any(self.axioms(root, start) for start in wfsa.iterinitial()): raise ValueError('No rule for the start symbol %s' % root) new_roots = set() while agenda: item = agenda.pop() # always returns an active item if item.is_complete(): # get slice variable for the current completed item u = self.slice_vars.get(item.rule.lhs, item.start, item.dot) # check whether the probability of the current completed item is above the threshold determined by # the slice variable if item.rule.log_prob > u: # complete root item spanning from a start wfsa state to a final wfsa state if item.rule.lhs == root and wfsa.is_initial( item.start) and wfsa.is_final(item.dot): agenda.make_complete(item) new_roots.add((root, item.start, item.dot)) agenda.make_passive(item) else: if self.complete_others(item): agenda.make_complete(item) agenda.make_passive(item) else: # a complete state is only kept in case it could potentially complete others agenda.discard(item) else: if is_terminal(item.next): # fire the operation 'scan' self.scan(item) agenda.discard( item ) # scanning renders incomplete items of this kind useless else: if not wcfg.can_rewrite( item.next ): # if the NT does not exist this item is useless agenda.discard(item) else: if not self.prediction( item ): # try to predict, otherwise try to complete itself self.complete_itself(item) agenda.make_passive(item) # converts complete items into rules logging.debug('Making forest...') return self.get_cfg(goal, root)
def get_intersected_rule(self, item): lhs = make_symbol(item.rule.lhs, item.start, item.dot) positions = item.inner + (item.dot, ) rhs = [ make_symbol(sym, positions[i], positions[i + 1]) for i, sym in enumerate(item.rule.rhs) ] # compute the wfsa contribution (assuming that it is with a log-semiring) wfsa_weight = 0.0 for i, sym in enumerate(item.rule.rhs): if is_terminal(sym): # assuming a log from positions[i] to positions[i + 1] with label `sym` wfsa_weight += self._wfsa.arc_weight(positions[i], positions[i + 1], sym) return Rule(lhs, rhs, item.rule.log_prob + wfsa_weight)
def do(self, root='[S]', goal='[GOAL]'): wfsa = self._wfsa wcfg = self._wcfg agenda = self._agenda # start items of the kind # GOAL -> * ROOT, where * is an intial state of the wfsa if not any(self.axioms(root, start) for start in wfsa.iterinitial()): raise ValueError('No rule for the start symbol %s' % root) new_roots = set() while agenda: item = agenda.pop() # always returns an active item if item.is_complete(): # get slice variable for the current completed item u = self.slice_vars.get(item.rule.lhs, item.start, item.dot) # check whether the probability of the current completed item is above the threshold determined by # the slice variable if item.rule.log_prob > u: # complete root item spanning from a start wfsa state to a final wfsa state if item.rule.lhs == root and wfsa.is_initial(item.start) and wfsa.is_final(item.dot): agenda.make_complete(item) new_roots.add((root, item.start, item.dot)) agenda.make_passive(item) else: if self.complete_others(item): agenda.make_complete(item) agenda.make_passive(item) else: # a complete state is only kept in case it could potentially complete others agenda.discard(item) else: if is_terminal(item.next): # fire the operation 'scan' self.scan(item) agenda.discard(item) # scanning renders incomplete items of this kind useless else: if not wcfg.can_rewrite(item.next): # if the NT does not exist this item is useless agenda.discard(item) else: if not self.prediction(item): # try to predict, otherwise try to complete itself self.complete_itself(item) agenda.make_passive(item) # converts complete items into rules logging.debug('Making forest...') return self.get_cfg(goal, root)
def recursion(derivation, projection, Q, wcfg, counts): #print 'd:', '|'.join(str(r) for r in derivation) #print 'p:', projection #print 'Q:', Q if Q: sym = Q.popleft() #print ' pop:', sym if is_terminal(sym): recursion(derivation, [sym] + projection, Q, wcfg, counts) else: for rule in wcfg[sym]: #print ' rule:', rule QQ = deque(Q) QQ.extendleft(rule.rhs) recursion(derivation + [rule], projection, QQ, wcfg, counts) else: counts['d'][tuple(derivation)] += 1 counts['p'][tuple(projection)] += 1
def scan(item, sentence): """ Scan a terminal (compatible with CKY and Earley). Inference rule: [X -> alpha * x beta, [i ... j]] ------------------------------------ sentence[j] == x [X -> alpha x * beta, [i ... j + 1]] :param item: an active Item :param sentence: a list/tuple of terminals :returns: an Item or None """ assert is_terminal(item.next), 'Only terminal symbols can be scanned, got %s' % item.next if item.dot < len(sentence) and sentence[item.dot] == item.next: return item.advance(item.dot + 1) else: return None
def scan(item, sentence): """ Scan a terminal (compatible with CKY and Earley). Inference rule: [X -> alpha * x beta, [i ... j]] ------------------------------------ sentence[j] == x [X -> alpha x * beta, [i ... j + 1]] :param item: an active Item :param sentence: a list/tuple of terminals :returns: an Item or None """ assert is_terminal( item.next), 'Only terminal symbols can be scanned, got %s' % item.next if item.dot < len(sentence) and sentence[item.dot] == item.next: return item.advance(item.dot + 1) else: return None
def recursion(derivation, projection, Q, wcfg, scores): # print 'd:', '|'.join(str(r) for r in derivation) # print 'p:', projection # print 'Q:', Q if Q: sym = Q.popleft() # print ' pop:', sym if is_terminal(sym): recursion(derivation, [sym] + projection, Q, wcfg, scores) else: for rule in wcfg[sym]: # print ' rule:', rule QQ = deque(Q) QQ.extendleft(rule.rhs) recursion(derivation + [rule], projection, QQ, wcfg, scores) else: score = sum([r.log_prob for r in derivation]) if scores['d'][tuple(derivation)] < score: scores['d'][tuple(derivation)] = score if scores['p'][tuple(projection)] < score: scores['p'][tuple(projection)] = score