Ejemplo n.º 1
0
    def recognize(self, text, token_stream):
        self.results = []
        tokens = morph_parser(token_stream)
        while True:
            stack = Stack(self)
            stack.shift(None, None, 0)
            stack.count_active = 1
            prev_tok = INITIAL_TOKEN
            labels_ok = True

            for token_num, token in enumerate(tokens):
                self.debug("\n\n\nNEW ITERATION. Token:", token[1])
                self.debug(token)
                if len(stack.active) == 0:
                    if not self.error_detected(text, tokens, prev_tok, stack.previously_active):
                        text, tokens = self.without_first_word(text, tokens)
                        break
                    else:
                        continue
                prev_tok = token

                # свертка
                for i, node in stack.enumerate_active():  # S.active may grow
                    state = node.data

                    # raw-слова в кавычках
                    raw_token = "'%s'" % token[1]
                    if raw_token in self.ACTION[state]:
                        for r, rule in ifilter(lambda x: x[0] == 'R', self.ACTION[state][raw_token]):
                            self.debug("- Reduce")
                            self.debug("-- Actions", self.ACTION[state][raw_token])
                            self.debug("-- Raw token", node, rule)
                            labels_ok = self.check_labels(tokens, self.R.labels[rule])
                            if not labels_ok:
                                break
                            stack.reduce(node, rule)

                    # обычные состояния
                    if labels_ok:
                        for r, rule in ifilter(lambda x: x[0] == 'R', self.ACTION[state][token[0]]):
                            self.debug("- Reduce")
                            self.debug("-- Actions", self.ACTION[state])
                            self.debug("-- Normal", node, rule)
                            labels_ok = self.check_labels(tokens, self.R.labels[rule])
                            if not labels_ok:
                                break
                            stack.reduce(node, rule)

                    # имитация конца предложения
                    if labels_ok:
                        for r, rule in ifilter(lambda x: x[0] == 'R', self.ACTION[state]["$"]):
                            self.debug("- Reduce")
                            self.debug("-- Actions", self.ACTION[state])
                            self.debug("-- EOS", node, rule)
                            labels_ok = self.check_labels(tokens, self.R.labels[rule])
                            if not labels_ok:
                                break
                            stack.reduce(node, rule)

                    self.debug("- STACK")
                    if self.debug_mode:
                        stack.dump()

                # последняя свертка не удовлетворила лейблам
                if not labels_ok:
                    self.debug("- Labels not OK")
                    text, tokens = self.without_first_word(text, tokens)
                    break

                # конец?
                if token[0] == '$':
                    acc = stack.accepts()
                    if acc:
                        self.results.append(text)
                        self.debug("- Found new result:", self.results)
                    else:
                        self.error_detected(text, tokens, token, stack.active)
                    return self.results

                # перенос
                stack.count_active = len(stack.active)
                for node in (stack.active[i] for i in xrange(len(stack.active))):
                    # из стека могут удаляться состояния, так что верхний длинный for правда оказался нужен
                    state = node.data

                    # raw-слова в кавычках
                    raw_token = "'%s'" % token[1]
                    if raw_token in self.ACTION[state]:
                        for r, state in ifilter(lambda x: x[0] == 'S',  self.ACTION[state][raw_token]):
                            self.debug("- Shift")
                            self.debug("-- Raw", node, token)
                            stack.shift(node, (token,), state)

                    # обычные состояния
                    for r, state in ifilter(lambda x: x[0] == 'S',  self.ACTION[state][token[0]]):
                        self.debug("- Shift")
                        self.debug("-- Normal", node, token)
                        stack.shift(node, (token,), state)

                    self.debug("- Stack:")
                    if self.debug_mode:
                        stack.dump()

                # слияние состояний
                stack.merge()

        return self.results
Ejemplo n.º 2
0
    def recognize(self, text, token_stream):
        self.results = []
        tokens = morph_parser(token_stream)
        while True:
            stack = Stack(self)
            stack.shift(None, None, 0)
            stack.count_active = 1
            prev_tok = INITIAL_TOKEN
            labels_ok = True

            for token_num, token in enumerate(tokens):
                self.debug("\n\n\nNEW ITERATION. Token:", token[1])
                self.debug(token)
                if len(stack.active) == 0:
                    if not self.error_detected(text, tokens, prev_tok,
                                               stack.previously_active):
                        text, tokens = self.without_first_word(text, tokens)
                        break
                    else:
                        continue
                prev_tok = token

                # свертка
                for i, node in stack.enumerate_active():  # S.active may grow
                    state = node.data

                    # raw-слова в кавычках
                    raw_token = "'%s'" % token[1]
                    if raw_token in self.ACTION[state]:
                        for r, rule in ifilter(lambda x: x[0] == 'R',
                                               self.ACTION[state][raw_token]):
                            self.debug("- Reduce")
                            self.debug("-- Actions",
                                       self.ACTION[state][raw_token])
                            self.debug("-- Raw token", node, rule)
                            labels_ok = self.check_labels(
                                tokens, self.R.labels[rule])
                            if not labels_ok:
                                break
                            stack.reduce(node, rule)

                    # обычные состояния
                    if labels_ok:
                        for r, rule in ifilter(lambda x: x[0] == 'R',
                                               self.ACTION[state][token[0]]):
                            self.debug("- Reduce")
                            self.debug("-- Actions", self.ACTION[state])
                            self.debug("-- Normal", node, rule)
                            labels_ok = self.check_labels(
                                tokens, self.R.labels[rule])
                            if not labels_ok:
                                break
                            stack.reduce(node, rule)

                    # имитация конца предложения
                    if labels_ok:
                        for r, rule in ifilter(lambda x: x[0] == 'R',
                                               self.ACTION[state]["$"]):
                            self.debug("- Reduce")
                            self.debug("-- Actions", self.ACTION[state])
                            self.debug("-- EOS", node, rule)
                            labels_ok = self.check_labels(
                                tokens, self.R.labels[rule])
                            if not labels_ok:
                                break
                            stack.reduce(node, rule)

                    self.debug("- STACK")
                    if self.debug_mode:
                        stack.dump()

                # последняя свертка не удовлетворила лейблам
                if not labels_ok:
                    self.debug("- Labels not OK")
                    text, tokens = self.without_first_word(text, tokens)
                    break

                # конец?
                if token[0] == '$':
                    acc = stack.accepts()
                    if acc:
                        self.results.append(text)
                        self.debug("- Found new result:", self.results)
                    else:
                        self.error_detected(text, tokens, token, stack.active)
                    return self.results

                # перенос
                stack.count_active = len(stack.active)
                for node in (stack.active[i]
                             for i in xrange(len(stack.active))):
                    # из стека могут удаляться состояния, так что верхний длинный for правда оказался нужен
                    state = node.data

                    # raw-слова в кавычках
                    raw_token = "'%s'" % token[1]
                    if raw_token in self.ACTION[state]:
                        for r, state in ifilter(lambda x: x[0] == 'S',
                                                self.ACTION[state][raw_token]):
                            self.debug("- Shift")
                            self.debug("-- Raw", node, token)
                            stack.shift(node, (token, ), state)

                    # обычные состояния
                    for r, state in ifilter(lambda x: x[0] == 'S',
                                            self.ACTION[state][token[0]]):
                        self.debug("- Shift")
                        self.debug("-- Normal", node, token)
                        stack.shift(node, (token, ), state)

                    self.debug("- Stack:")
                    if self.debug_mode:
                        stack.dump()

                # слияние состояний
                stack.merge()

        return self.results