Example #1
0
    def parse_operator(self, scope):
        """Parse an operator from the stream and create an operator sentence.

        This one does not enforce the shape of operator sentences. It is just
        a bit too complex to do cleanly, so it happens on the define.

        :return: A Sentence, may be an operator sentence or might just be
            an expression."""
        token = next(self._token_stream)
        node = Sentence()
        part_match = scope.new_matcher()
        for token in self._token_stream:
            if isinstance(token, OperToken):
                if part_match.next(token):
                    node.append(token)
                elif part_match.end():
                    self._token_stream.push_back(token)
                    return node
                else:
                    raise ParseError('Sentence not matched.', node)
            elif isinstance(token, FirstToken):
                self._token_stream.push_back(token)
                if part_match.next():
                    node.append(self.parse_expression(scope))
                elif part_match.end():
                    return node if 1 < len(node) else node[0]
                else:
                    raise ParseError('Sentence not matched.', node)
            else:
                self._token_stream.push_back(token)
                if part_match.end():
                    return node
                else:
                    raise ParseError('Sentence not matched.', node)
        raise ParseError('Operator not closed')
Example #2
0
    def parse_definition(self, outer_scope):
        """Parse a definition from the incomming tokens.

        This is technically a kind of expression, but there are a few special
        rules that may force it to become seperate. This is temporary as it
        is just the fastest way I can get this to work. I hope.

        'Define Function or variable name. to be Body. .'"""
        token = next(self._token_stream)
        if 'Define' != token.text:
            raise ParseError('Invalid start of definition: ' + str(token))
        node = Sentence(token)
        ptr = outer_scope.new_matcher()
        if not ptr.next(token):
            self._token_stream.push_back(item)
            raise ParseError('Sentence not matched.', node)
        inner_scope = None
        for item in self._token_stream:
            if isinstance(item, FirstToken):
                self._token_stream.push_back(item)
                if ptr.next():
                    if inner_scope is None:
                        signature = self.parse_signature()
                        node.append(signature)
                        inner_scope = outer_scope.new_define_scope(signature)
                    else:
                        node.append(self.parse_expression(inner_scope))
                elif node.ends_with_dot() and ptr.has_end():
                    return node
                else:
                    raise ParseError('Sentence not matched.', node)
            elif isinstance(item, WordToken):
                if ptr.next(item):
                    node.append(item)
                elif node.ends_with_dot() and ptr.has_end():
                    self._token_stream.push_back(item)
                    return node
                else:
                    raise ParseError('Sentence not matched.', node)
            elif isinstance(item, PeriodToken):
                if ptr.has_end():
                    node.append(item)
                    return node
                else:
                    raise ParseError('Sentence not matched.', node)
            elif isinstance(item, ValueToken):
                if ptr.next():
                    node.append(Sentence(item))
                else:
                    raise ParseError('Sentence not matched.', node)
            else:
                raise TypeError('Parser.parse_definition: Unexpected type' +
                                str(type(item)))
        if node.ends_with_dot() and ptr.has_end():
            return node
        raise ParseError('Sentence not matched.', node)
Example #3
0
def into_sentence(prefix: list[int], conn_dict: dict[int, tp.Iterable[str]],
                  var_amount: int, var_type: str, sess) -> Sentence:
    s = Sentence([], sess)
    variables = []
    for _ in range(var_amount):
        t = s.generate(var_type)
        s.append(t)
        variables.append(t)

    s = Sentence([], sess)
    _into_sentence(s, prefix, conn_dict, variables)
    return s
Example #4
0
    def parse_expression(self, scope):
        """Parse an expression.

        Expressions may contain other forms as well, a expression is the
        'other' type of sentence. They must match a known of sentence in
        the scope.

        :param scope: The scope the expression is being parsed within.
        :return: A Sentence."""
        token = next(self._token_stream)
        if isinstance(token, ValueToken):
            return Sentence([token])
        elif not isinstance(token, FirstToken):
            raise ParseError('Cannot begin a sentence with \"' + repr(token) +
                             '\"')
        elif 'Define' == token.text:
            self._token_stream.push_back(token)
            return self.parse_definition(scope)
        node = Sentence([token])
        part_match = scope.new_matcher()
        if not part_match.next(token):
            self._token_stream.push_back(token)
            raise ParseError('Sentence not matched.', node)
        for token in self._token_stream:
            if isinstance(token, FirstToken):
                self._token_stream.push_back(token)
                if part_match.next():
                    node.append(self.parse_expression(scope))
                elif node.ends_with_dot() and part_match.has_end():
                    return node
                else:
                    raise ParseError('Sentence not matched.', node)
            elif isinstance(token, WordToken):
                if part_match.next(token):
                    node.append(token)
                elif node.ends_with_dot() and part_match.has_end():
                    self._token_stream.push_back(token)
                    return node
                else:
                    raise ParseError('Sentence not matched.', node)
            elif isinstance(token, PeriodToken):
                if part_match.has_end():
                    node.append(token)
                    return node
                else:
                    raise ParseError('Sentence not matched.', node)
            elif isinstance(token, ValueToken):
                if part_match.next():
                    node.append(Sentence([token]))
                else:
                    raise ParseError('Sentence not matched.', node)
            else:
                raise ValueError('Unknown Token Kind: {}'.format(type(token)))
        if isinstance(node[-1], Sentence) and part_match.has_end():
            return node
        raise ParseError('Sentence not matched.', node)
Example #5
0
def _into_sentence(s: Sentence, prefix: list[int],
                   conn_dict: dict[int,
                                   tp.Iterable[str]], variables: list[str]):
    l = prefix[0]
    if l == 0:
        s.append(rchoice(variables))
    else:
        possible_main = conn_dict[l]
        main = s.generate(rchoice(possible_main))
        if l == 2:
            # INFIX
            s.append('(')
            _into_sentence(s, prefix[1:], conn_dict, variables)
            s.append(main)
            _into_sentence(s, prefix[1:], conn_dict, variables)
            s.append(')')
        else:
            s.append(main)
            for _ in range(l):
                _into_sentence(s, prefix[1:], conn_dict, variables)
Example #6
0
    def parse_signature(self):
        """Take a stream of tokens and create a Signature.

        Signatures have stricter rules than other parts of the language, but
        they are context insensitive and don't have to match definitions.
        This will use tokens from the stream, but may not empty the stream.

        :return: A Sentence reperesenting the sentence."""
        token = next(self._token_stream)
        if not isinstance(token, FirstToken):
            raise ParseError('Invalid start of Signature: ' + str(token))
        node = Sentence([token])
        for token in self._token_stream:
            if isinstance(token, FirstToken):
                self._token_stream.push_back(token)
                node.append(self.parse_signature())
            elif isinstance(token, WordToken):
                node.append(token)
            elif isinstance(token, PeriodToken):
                node.append(token)
                return node
            elif isinstance(token, ValueToken):
                raise ParseError('Parser.parse_signature: ValueToken not '
                                 'allowed in signature.')
            else:
                raise ValueError('Unknown Token Kind: {}'.format(type(token)))
        raise ParseError('Parser.parse_signature: fell out of the loop.')
Example #7
0
        txt, pos = pos_word.split('_')
        pre_set_rhythm = '#0'
        while start < len(txt_words):
            total_bit += len(txt_words[start])
            if total_bit > len(txt):
                total_bit -= len(txt) + len(txt_words[start])
                break
            elif total_bit == len(txt):
                total_bit = 0
                pre_set_rhythm = rhythm[start]
                start += 1
                break
            pre_set_rhythm = max(pre_set_rhythm, rhythm[start])
            start += 1

        sentence.append(txt, pos, pre_set_rhythm)

    print sent_id, '\t', sent
    # print len(rhythm),rhythm[len(rhythm)-1]
    # print pos_line
    print '\t',
    sentence.show()
    # print sentence.length()
    print '\t', syl_line
    print

    ## 读入下一行
    txt_line = txt_file.readline()
    # seg_line=seg_file.readline()
    pos_line = pos_file.readline()