Ejemplo n.º 1
0
 def __escape_z__():
   InvalidTokenBuilder.__escape_z__()
   WhitespaceTokenBuilder.__escape_z__()
   NewlineTokenBuilder.__escape_z__()
   EscapedStringTokenBuilder.__escape_z__()
   PrefixedStringTokenBuilder.__escape_z__()
   SuffixedStringTokenBuilder.__escape_z__()
   IntegerTokenBuilder.__escape_z__()
   IntegerExponentTokenBuilder.__escape_z__()
   PrefixedIntegerTokenBuilder.__escape_z__()
   SuffixedIntegerTokenBuilder.__escape_z__()
   RealTokenBuilder.__escape_z__()
   RealExponentTokenBuilder.__escape_z__()
   SuffixedRealTokenBuilder.__escape_z__()
   IdentifierTokenBuilder.__escape_z__()
   PrefixedIdentifierTokenBuilder.__escape_z__()
   CaseInsensitiveListTokenBuilder.__escape_z__()
   CaseSensitiveListTokenBuilder.__escape_z__()
   SingleCharacterTokenBuilder.__escape_z__()
   SlashSlashCommentTokenBuilder.__escape_z__()
   SlashStarCommentTokenBuilder.__escape_z__()
   ClassTypeTokenBuilder.__escape_z__()
   HexRealExponentTokenBuilder.__escape_z__()
   NestedCommentTokenBuilder.__escape_z__()
   return 'Escape ?Z'
Ejemplo n.º 2
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     PrefixedIdentifierTokenBuilder.__escape_z__()
     RegexTokenBuilder.__escape_z__()
     RubyIdentifierTokenBuilder.__escape_z__()
     HereDocTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Ejemplo n.º 3
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     PrefixedIdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     BlockTokenBuilder.__escape_z__()
     KeywordTokenBuilder.__escape_z__()
     MatlabStringTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Ejemplo n.º 4
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     PrefixedStringTokenBuilder.__escape_z__()
     PrefixedRawStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     PrefixedIdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     TripleQuoteStringTokenBuilder.__escape_z__()
     RawTripleQuoteCommentTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Ejemplo n.º 5
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     SuffixedIdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     PrefixedIdentifierTokenBuilder.__escape_z__()
     TripleQuoteStringTokenBuilder.__escape_z__()
     SlashSlashCommentTokenBuilder.__escape_z__()
     SlashStarCommentTokenBuilder.__escape_z__()
     SwiftArgumentTokenBuilder.__escape_z__()
     SwiftSymbolTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Ejemplo n.º 6
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()
        stmt_separator_tb = SingleCharacterTokenBuilder(
            ';', 'statement separator', False)

        integer_tb = IntegerTokenBuilder('_')
        integer_exponent_tb = IntegerExponentTokenBuilder('_')
        real_tb = RealTokenBuilder(True, True, '_')
        real_exponent_tb = RealExponentTokenBuilder(True, True, 'E', '_')
        operand_types.append('number')

        argument_tb = SwiftArgumentTokenBuilder()

        leads = '_'
        extras = '_'
        suffixes = '?'
        identifier_tb = SuffixedIdentifierTokenBuilder(leads, extras, suffixes)
        operand_types.append('identifier')

        attribute_tb = PrefixedIdentifierTokenBuilder('@', 'attribute', False)

        symbol_tb = SwiftSymbolTokenBuilder('.', 'symbol', True)
        operand_types.append('symbol')

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 10)
        triple_quote_comment_tb = TripleQuoteStringTokenBuilder(quotes)
        slash_slash_comment_tb = SlashSlashCommentTokenBuilder()
        slash_star_comment_tb = SlashStarCommentTokenBuilder()
        operand_types.append('string')

        known_operators = [
            '+', '-', '*', '/', '%', '==', '!=', '>', '<', '>=', '<=', '&&',
            '||', '!', '&', '|', '^', '~', '<<', '>>', '===', '=', '+=', '-=',
            '*=', '/=', '%=', '<<=', '>>=', '&=', '^=', '|=', '...', '..<',
            '?', ':', '.', '++', '--', '->', '??', '\\.', '&+', '&-', '&*'
        ]

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        self.unary_operators = ['+', '-', '!', '~', '&', '++', '--', ':', '?']

        self.postfix_operators = ['++', '--', ':', '!', '?']

        groupers = ['(', ')', ',', '[', ']', '{', '}']
        group_starts = ['(', '[', ',', '{']
        group_mids = [',']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        keywords = [
            'associatedtype', 'class', 'deinit', 'enum', 'extension',
            'fileprivate', 'func', 'import', 'init', 'inout', 'internal',
            'let', 'open', 'operator', 'private', 'protocol', 'public',
            'static', 'struct', 'subscript', 'typealias', 'var', 'break',
            'case', 'continue', 'default', 'defer', 'do', 'else',
            'fallthrough', 'for', 'guard', 'if', 'in', 'repeat', 'return',
            'switch', 'where', 'while', 'as', 'Any', 'catch', 'is', 'rethrows',
            'super', 'throw', 'throws', 'try', 'try?', 'try!', '#available',
            '#colorLiteral', '#column', '#else', '#elseif', '#endif', '#file',
            '#fileLiteral', '#function', '#if', '#imageLiteral', '#line',
            '#selector', '#sourceLocation', 'associativity', 'convenience',
            'dynamic', 'didSet', 'final', 'get', 'infix', 'indirect', 'lazy',
            'left', 'mutating', 'none', 'nonmutating', 'optional', 'override',
            'postfix', 'precedence', 'prefix', 'Protocol', 'required', 'right',
            'set', 'Type', 'unowned', 'weak', 'willSet'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        types = [
            'char',
            'double',
            'float',
            'int',
            'long',
            'short',
        ]

        types_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
        operand_types.append('type')

        values = ['nil', 'Self', 'false', 'true']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, stmt_separator_tb, integer_tb,
            integer_exponent_tb, real_tb, real_exponent_tb, argument_tb,
            keyword_tb, types_tb, values_tb, known_operator_tb, groupers_tb,
            identifier_tb, attribute_tb, symbol_tb, string_tb,
            slash_slash_comment_tb, slash_star_comment_tb,
            triple_quote_comment_tb, self.unknown_operator_tb,
            invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        self.tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid')
        self.convert_keywords_to_identifiers(['.'])

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types = ['number', 'string', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()
        self.calc_line_length_confidence(code, self.max_expected_line)
Ejemplo n.º 7
0
    def __init__(self, code):
        super().__init__()

        self.newlines_important = 'parens'

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()
        stmt_separator_tb = SingleCharacterTokenBuilder(
            ';', 'statement separator', False)

        integer_tb = IntegerTokenBuilder('_')
        integer_exponent_tb = IntegerExponentTokenBuilder('_')
        real_tb = RealTokenBuilder(True, True, '_')
        real_exponent_tb = RealExponentTokenBuilder(True, True, 'E', '_')
        operand_types.append('number')

        identifier_tb = RubyIdentifierTokenBuilder()
        operand_types.append('identifier')

        symbol_tb = PrefixedIdentifierTokenBuilder(':', 'symbol', True)
        operand_types.append('symbol')

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 10)
        operand_types.append('string')

        regex_tb = RegexTokenBuilder()
        operand_types.append('regex')

        heredoc_tb = HereDocTokenBuilder('<<-')

        hash_comment_tb = LeadToEndOfLineTokenBuilder('#', False, 'comment')

        known_operators = [
            '!', '~', '**', '*', '/', '%', '+', '-', '<<', '>>', '&', '|', '^',
            '<', '<=', '>', '>=', '==', '===', '!=', '=~', '!~', '<=>', '&&',
            '||', '..', '...', '?', ':', '=', '**=', '*=', '/=', '%=', '+=',
            '-=', '<<=', '>>=', '&&=', '&=', '||=', '|=', '^=', 'not', 'and',
            'or', 'in', '.', '.:', '=>', '::', '<<-'
        ]

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        self.unary_operators = ['+', '-', '!', '~', '&', '*', '**', '<<-']

        self.postfix_operators = ['++', '--']

        groupers = ['(', ')', ',', '[', ']', '{', '}']
        group_starts = ['(', '[', ',', '{']
        group_mids = [',']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        keywords = [
            'BEGIN', 'END', 'alias', 'begin', 'break', 'case', 'class', 'def',
            'defined?', 'do', 'else', 'elsif', 'end', 'ensure', 'for', 'if',
            'module', 'next', 'redo', 'rescue', 'retry', 'return', 'then',
            'undef', 'unless', 'until', 'when', 'while', 'yield'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        values = ['nil', 'self', 'true', 'false', 'super']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        array_markers = ['%w', '%q', '%Q', '%i', '%s', '%x']

        array_marker_tb = CaseSensitiveListTokenBuilder(
            array_markers, 'identifier', True)

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, stmt_separator_tb, integer_tb,
            integer_exponent_tb, real_tb, real_exponent_tb, keyword_tb,
            values_tb, symbol_tb, known_operator_tb, groupers_tb, regex_tb,
            identifier_tb, array_marker_tb, string_tb, heredoc_tb,
            hash_comment_tb, self.unknown_operator_tb, invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        self.tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid')

        self.convert_bars_to_groups()
        self.convert_keywords_to_identifiers(['.'])
        self.convert_operators_to_identifiers()

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_parens_continued_lines(tokens)
        tokens = Examiner.join_operator_continued_lines(
            tokens, self.postfix_operators)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number', 'string', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        # self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        openers = ['begin', 'def', 'do', 'class', 'module']
        closers = ['end']
        self.calc_paired_blockers_confidence(openers, closers)

        self.calc_line_length_confidence(code, self.max_expected_line)
Ejemplo n.º 8
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder('_')
        integer_exponent_tb = IntegerExponentTokenBuilder('_')
        real_tb = RealTokenBuilder(False, False, '_')
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', '_')
        operand_types.append('number')

        leads = '_'
        extras = '_'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        operand_types.append('identifier')

        decorator_tb = PrefixedIdentifierTokenBuilder('@', 'decorator', False)
        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 0)
        operand_types.append('string')

        class_type_tb = ClassTypeTokenBuilder()
        operand_types.append('class')

        slash_slash_comment_tb = SlashSlashCommentTokenBuilder()
        slash_star_comment_tb = SlashStarCommentTokenBuilder()

        terminators_tb = SingleCharacterTokenBuilder(';',
                                                     'statement terminator',
                                                     False)

        known_operators = [
            '+', '-', '*', '/', '%', '=', '==', '!=', '>', '>=', '<', '<=',
            '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=', '!',
            '&', '|', '~', '<<', '>>', '>>>', '>>>=', '^', '.', '::', '++',
            '--', '&&', '||', '?', '->', 'new'
        ]

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        self.unary_operators = ['+', '-', '!', '~', '++', '--', 'new']

        self.postfix_operators = ['++', '--']

        groupers = ['(', ')', ',', '[', ']', '{', '}', ':']
        group_starts = ['(', '[', ',', '{']
        group_ends = [')', ']', '}']
        group_mids = [',', ':']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        keywords = [
            'abstract', 'assert', 'break', 'case', 'catch', 'class', 'const',
            'continue', 'default', 'do', 'else', 'enum', 'extends', 'final',
            'finally', 'for', 'goto', 'if', 'implements', 'import',
            'instanceof', 'interface', 'native', 'package', 'private',
            'protected', 'public', 'return', 'static', 'strictfp', 'super',
            'switch', 'synchronized', 'throw', 'throws', 'transient', 'try',
            'volatile', 'while'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        types = [
            'boolean', 'byte', 'char', 'double', 'float', 'int', 'long',
            'short', 'string', 'void', 'Integer', 'String', 'StringBuilder',
            'File', 'Exception', 'IOException'
        ]

        types_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
        operand_types.append('type')

        values = ['false', 'null', 'this', 'true']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, terminators_tb, integer_tb,
            integer_exponent_tb, real_tb, real_exponent_tb, keyword_tb,
            types_tb, values_tb, known_operator_tb, groupers_tb, identifier_tb,
            class_type_tb, decorator_tb, string_tb, slash_slash_comment_tb,
            slash_star_comment_tb, self.unknown_operator_tb,
            invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid')
        tokens = Examiner.combine_identifier_colon(
            tokens, ['statement terminator', 'newline'], ['{'],
            ['whitespace', 'comment'])
        self.tokens = tokens
        self.convert_identifiers_to_labels()

        self.convert_keywords_to_identifiers(['::', '.'])
        self.convert_operators_to_identifiers(['::', '.'])

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number', 'string', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
Ejemplo n.º 9
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder('_')
        integer_exponent_tb = IntegerExponentTokenBuilder('_')
        hex_integer_tb = PrefixedIntegerTokenBuilder(
            '0x', False, '0123456789ABCDEFabcdef_')
        real_tb = RealTokenBuilder(True, True, '_')
        real_exponent_tb = RealExponentTokenBuilder(True, True, 'E', '_')
        operand_types.append('number')

        leads = '_'
        extras = '_'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        operand_types.append('identifier')

        decorator_tb = PrefixedIdentifierTokenBuilder('@', 'decorator', False)
        operand_types.append('decorator')

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 0)
        triple_quote_string_tb = TripleQuoteStringTokenBuilder(quotes)
        operand_types.append('string')

        slash_slash_comment_tb = SlashSlashCommentTokenBuilder()
        slash_star_comment_tb = SlashStarCommentTokenBuilder()

        class_tb = ClassTypeTokenBuilder()
        operand_types.append('class')

        terminators_tb = SingleCharacterTokenBuilder(';',
                                                     'statement terminator',
                                                     False)

        known_operators = [
            '+', '-', '*', '/', '%', '=', '+=', '-=', '*=', '/=', '%=', '++',
            '--', '&&', '||', '!', '==', '!=', '===', '!==', '<', '>', '<=',
            '>=', '!!', '?.', '?:', '::', '..', ':', '?', '.'
        ]

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        self.unary_operators = ['+', '-', '!', '*', '++', '--']

        self.postfix_operators = ['++', '--', ':']

        groupers = ['->', '(', ')', ',', '[', ']', '{', '}']
        group_starts = ['(', '[', ',', '{']
        group_mids = ['->', ',']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        keywords = [
            'as', 'as?', 'break', 'class', 'continue', 'do', 'else', 'for',
            'fun', 'if', 'in', '!in', 'is', '!is', 'object', 'package',
            'return', 'super', 'throw', 'try', 'typealias', 'typeof', 'val',
            'var', 'when', 'while', 'by', 'catch', 'constructor', 'delegate',
            'dynamic', 'field', 'file', 'finally', 'get', 'import', 'init',
            'param', 'property', 'receiver', 'set', 'setparam', 'where',
            'actual', 'abstract', 'annotation', 'companion', 'const',
            'crossinline', 'data', 'enum', 'expect', 'external', 'final',
            'infix', 'inline', 'inner', 'internal', 'lateinit', 'noinline',
            'open', 'operator', 'out', 'override', 'private', 'protected',
            'public', 'reified', 'sealed', 'suspend', 'tailrec', 'vararg'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        types = [
            'Byte', 'Short', 'Int', 'Long', 'Float', 'Double', 'Char', 'u',
            'f', 'ul', 'UInt', 'ULong', 'UByte', 'UShort'
        ]

        type_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
        operand_types.append('type')

        values = ['false', 'null', 'this', 'true']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, terminators_tb, integer_tb,
            integer_exponent_tb, hex_integer_tb, real_tb, real_exponent_tb,
            keyword_tb, type_tb, values_tb, known_operator_tb, groupers_tb,
            identifier_tb, class_tb, decorator_tb, string_tb,
            triple_quote_string_tb, slash_slash_comment_tb,
            slash_star_comment_tb, self.unknown_operator_tb,
            invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid')
        self.tokens = self.combine_numbers_and_adjacent_types(tokens)

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number', 'string', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
Ejemplo n.º 10
0
    def __init__(self, code):
        super().__init__()
        self.newlines_important = 'always'

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()
        stmt_separator_tb = SingleCharacterTokenBuilder(
            ';', 'statement separator', False)

        integer_tb = IntegerTokenBuilder('_')
        integer_exponent_tb = IntegerExponentTokenBuilder('_')
        real_tb = RealTokenBuilder(False, False, '_')
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', '_')
        operand_types.append('number')

        leads = '_'
        extras = '_'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        operand_types.append('identifier')

        decorator_tb = PrefixedIdentifierTokenBuilder('@', 'decorator', False)

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 0)
        raw_string_tb = PrefixedRawStringTokenBuilder('r', True, quotes)
        byte_string_tb = PrefixedStringTokenBuilder('b', True, quotes)
        unicode_string_tb = PrefixedStringTokenBuilder('u', True, quotes)
        fast_string_tb = PrefixedStringTokenBuilder('f', True, quotes)
        operand_types.append('string')

        triple_quote_comment_tb = TripleQuoteStringTokenBuilder(quotes)
        raw_triple_quote_comment_tb = RawTripleQuoteCommentTokenBuilder()
        hash_comment_tb = LeadToEndOfLineTokenBuilder('#', True, 'comment')

        known_operators = [
            '+', '-', '*', '/', '%', '@', '=', ':=', '==', '>', '>=', '<',
            '<=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=',
            '&', '|', '~', '<<', '>>', '**', '.', ':', '++', '--', 'and', 'or',
            'in', 'is', 'not'
        ]

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        self.unary_operators = ['+', '-', 'not', '~', '++', '--', '.']

        self.postfix_operators = ['++', '--', ':']

        self.adjective_operators = ['not']

        self.keyword_postfix = [':']

        groupers = ['(', ')', ',', '[', ']', '{', '}']
        group_starts = ['(', '[', ',', '{']
        group_mids = [',']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        continuation_chars = ['\\']
        line_continuation_tb = CaseInsensitiveListTokenBuilder(
            continuation_chars, 'line continuation', False)

        keywords = [
            'as', 'assert', 'break', 'case', 'class', 'continue', 'def', 'del',
            'elif', 'else', 'except', 'finally', 'for', 'from', 'global', 'if',
            'import', 'lambda', 'match', 'nonlocal', 'pass', 'print', 'raise',
            'return', 'try', 'while', 'while', 'with', 'yield'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        values = ['False', 'None', 'True']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, line_continuation_tb, stmt_separator_tb,
            integer_tb, integer_exponent_tb, real_tb, real_exponent_tb,
            keyword_tb, values_tb, known_operator_tb, groupers_tb,
            identifier_tb, decorator_tb, string_tb, raw_string_tb,
            byte_string_tb, unicode_string_tb, fast_string_tb, hash_comment_tb,
            triple_quote_comment_tb, raw_triple_quote_comment_tb,
            self.unknown_operator_tb, invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        self.tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid')

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_parens_continued_lines(tokens)
        tokens = Examiner.join_operator_continued_lines(
            tokens, self.postfix_operators)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = [['not', 'in']]
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = [
            'number', 'string', 'identifier', 'variable', 'symbol'
        ]
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_line_format_confidence()
        self.calc_line_length_confidence(code, self.max_expected_line)
Ejemplo n.º 11
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder("'")
        integer_exponent_tb = IntegerExponentTokenBuilder("'")
        hex_integer_tb = PrefixedIntegerTokenBuilder('0x', False,
                                                     '0123456789abcdefABCDEF')
        binary_integer_tb = PrefixedIntegerTokenBuilder('0b', False, '01')
        real_tb = RealTokenBuilder(False, False, "'")
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', "'")
        operand_types.append('number')

        leads = '_'
        extras = '_'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        operand_types.append('identifier')

        annotation_tb = PrefixedIdentifierTokenBuilder('@', 'annotation',
                                                       False)
        operand_types.append('annotation')

        symbol_tb = PrefixedIdentifierTokenBuilder('#', 'symbol', True)
        operand_types.append('symbol')

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 0)
        raw_string_tb = PrefixedRawStringTokenBuilder('r', True, quotes)
        operand_types.append('string')

        class_type_tb = ClassTypeTokenBuilder()
        operand_types.append('class')

        slash_slash_comment_tb = SlashSlashCommentTokenBuilder()
        slash_star_comment_tb = SlashStarCommentTokenBuilder()

        line_continuation_tb = SingleCharacterTokenBuilder(
            '\\', 'line continuation', False)
        terminators_tb = SingleCharacterTokenBuilder(';',
                                                     'statement terminator',
                                                     False)

        known_operators = [
            '+', '-', '*', '/', '~/', '%', '^', '=', '==', '!=', '>', '>=',
            '<', '<=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=',
            '>>=', '!', '&', '|', '~', '<<', '>>', '~/=', '||', '&&', '.',
            '..', ':', '?', '??', '??=', 'as', 'is', 'is!', '++', '--', 'new'
        ]

        self.unary_operators = [
            '+', '-', '*', '!', '~', '.', '..'
            '?.', '++', '--', 'new'
        ]

        self.postfix_operators = ['++', '--']

        groupers = ['(', ')', ',', '[', ']', '{', '}']
        group_starts = ['(', '[', ',', '{']
        group_mids = [',']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        keywords = [
            'abstract', 'assert', 'async', 'await', 'break', 'case', 'catch',
            'class', 'const', 'continue', 'covariant', 'default', 'deferred',
            'do', 'dynamic', 'else', 'enum', 'export', 'extends', 'external',
            'factory', 'final', 'finally', 'for', 'Function', 'get', 'hide',
            'if', 'implements', 'import', 'in', 'interface', 'library',
            'mixin', 'on', 'operator', 'part', 'rethrow', 'return', 'set',
            'show', 'static', 'switch', 'sync', 'throw', 'try', 'typedef',
            'var', 'void', 'while', 'with', 'yield'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        types = ['int', 'double', 'String', 'List', 'bool', 'void']

        types_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
        operand_types.append('type')

        values = ['false', 'true', 'null', 'this', 'super']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, line_continuation_tb, terminators_tb,
            integer_tb, integer_exponent_tb, hex_integer_tb, binary_integer_tb,
            real_tb, real_exponent_tb, keyword_tb, types_tb, values_tb,
            groupers_tb, known_operator_tb, identifier_tb, annotation_tb,
            symbol_tb, class_type_tb, string_tb, raw_string_tb,
            slash_slash_comment_tb, slash_star_comment_tb,
            self.unknown_operator_tb, invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid')
        tokens = Examiner.combine_identifier_colon(
            tokens, ['statement terminator', 'newline'], ['{'],
            ['whitespace', 'comment'])
        self.tokens = tokens
        self.convert_identifiers_to_labels()

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
Ejemplo n.º 12
0
  def __init__(self, code, block_comment_limit):
    super().__init__()

    operand_types = []

    whitespace_tb = WhitespaceTokenBuilder()
    newline_tb = NewlineTokenBuilder()

    integer_tb = IntegerTokenBuilder("'")
    integer_exponent_tb = IntegerExponentTokenBuilder("'")
    hex_integer_tb = PrefixedIntegerTokenBuilder('0x', False, '0123456789abcdefABCDEF_')
    binary_integer_tb = PrefixedIntegerTokenBuilder('0b', False, '01_')
    suffixed_integer_tb = SuffixedIntegerTokenBuilder(['U', 'L', 'LU', 'UL'], False, None)
    real_tb = RealTokenBuilder(False, False, "'")
    suffixed_real_tb = SuffixedRealTokenBuilder(False, False, ['f', 'l', 'i'], False, None)
    real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', "'")
    hex_real_tb = HexRealExponentTokenBuilder()
    operand_types.append('number')

    leads = '_'
    extras = '_'
    identifier_tb = IdentifierTokenBuilder(leads, extras)
    operand_types.append('identifier')

    attribute_tb = PrefixedIdentifierTokenBuilder('@', 'attribute', False)
    operand_types.append('attribute')

    # string suffix: c,w,d
    quotes = ['"', "'", "’"]
    string_tb = EscapedStringTokenBuilder(quotes, 0)
    r_string_tb = PrefixedStringTokenBuilder('r', True, quotes)
    backtick_string_tb = EscapedStringTokenBuilder(['`'], 0)
    x_string_tb = PrefixedStringTokenBuilder('x', True, quotes)
    q_string_tb = PrefixedStringTokenBuilder('q', True, quotes)
    # q{} string
    cwd_string_tb = SuffixedStringTokenBuilder(quotes, 'cwd', False)
    operand_types.append('string')

    class_type_tb = ClassTypeTokenBuilder()
    operand_types.append('class')

    slash_slash_comment_tb = SlashSlashCommentTokenBuilder()
    slash_star_comment_tb = SlashStarCommentTokenBuilder()
    slash_plus_comment_tb = NestedCommentTokenBuilder('/+', '+/', block_comment_limit)

    line_continuation_tb = SingleCharacterTokenBuilder('\\', 'line continuation', False)
    terminators_tb = SingleCharacterTokenBuilder(';', 'statement terminator', False)

    known_operators = [
      '/', '/=', '.', '..', '...', '&', '&=', '&&', '|', '|=', '||',
      '-', '-=', '--', '+', '+=', '++', '<', '<=', '<<', '<<=', '>', '>=',
      '>>=', '>>>=', '>>', '>>>', '!', '!=', '?', ',', ':', '$',
      '=', '==', '*', '*=', '%', '%=', '^', '^=', '^^', '^^=', '~', '~=',
      '@', '=>', '#',
      'new', 'delete',
      'typeof', 'is'
    ]

    self.unary_operators = [
      '+', '-', '*',
      '!', '&', '~',
      '++', '--', ':',
      'new', 'delete',
      'typeof', 'is'
    ]

    self.postfix_operators = [
      '++', '--', '&', ':'
    ]

    groupers = ['(', ')', ',', '[', ']', '{', '}']
    group_starts = ['(', '[', ',', '{']
    group_mids = [',']
    group_ends = [')', ']', '}']

    groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

    known_operator_tb = CaseSensitiveListTokenBuilder(known_operators, 'operator', False)

    keywords = [
      'abstract', 'alias', 'align', 'asm', 'assert', 'auto',
      'body', 'break', 'case', 'cast', 'catch', 'class', 'const', 'continue',
      'debug', 'default', 'delegate', 'deprecated', 'do',
      'else', 'enum', 'export', 'extern',
      'final', 'finally', 'for', 'foreach', 'foreach_reverse', 'function',
      'goto',
      'if', 'immutable', 'import', 'in', 'inout', 'interface', 'invariant',
      'lazy',
      'macro', 'mixin', 'module',
      'nothrow',
      'out', 'override',
      'package', 'pragma', 'private', 'protected', 'public', 'pure',
      'ref', 'return',
      'scope', 'shared', 'static', 'struct', 'switch', 'synchronized',
      'template', 'throw', 'try', 'typeid',
      'union', 'unittest', 'version', 'while', 'with',
      '__gshared', '__traits', '__vector', '__parameters'
]

    keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

    types = [
      'bool', 'byte', 'cdouble', 'cent', 'cfloat', 'char', 'creal',
      'dchar', 'double', 'float', 'idouble', 'ifloat', 'int', 'ireal',
      'long', 'real', 'short', 'ubyte', 'ucent', 'uint', 'ulong', 'ushort',
      'void', 'wchar'
    ]

    types_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
    operand_types.append('type')

    values = [
      'false', 'null', 'super', 'this', 'true',
      '__FILE__', '__FILE_FULL_PATH__', '__MODULE__', '__LINE__',
      '__FUNCTION__', '__PRETTY_FUNCTION__',
      '__DATE__', '__EOF__', '__TIME__','__TIMESTAMP__',
      '__VENDOR__', '__VERSION__'
    ]

    values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
    operand_types.append('value')

    invalid_token_builder = InvalidTokenBuilder()

    tokenbuilders = [
      newline_tb,
      whitespace_tb,
      line_continuation_tb,
      terminators_tb,
      integer_tb,
      integer_exponent_tb,
      hex_integer_tb,
      binary_integer_tb,
      suffixed_integer_tb,
      real_tb,
      real_exponent_tb,
      suffixed_real_tb,
      hex_real_tb,
      keyword_tb,
      types_tb,
      values_tb,
      groupers_tb,
      known_operator_tb,
      identifier_tb,
      attribute_tb,
      class_type_tb,
      string_tb,
      r_string_tb,
      x_string_tb,
      backtick_string_tb,
      q_string_tb,
      cwd_string_tb,
      slash_slash_comment_tb,
      slash_star_comment_tb,
      slash_plus_comment_tb,
      self.unknown_operator_tb,
      invalid_token_builder
    ]

    tokenizer = Tokenizer(tokenbuilders)
    tokens = tokenizer.tokenize(code)
    tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid operator')
    tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid')
    tokens = Examiner.combine_identifier_colon(tokens, ['statement terminator', 'newline'], ['{'], ['whitespace', 'comment'])
    self.tokens = tokens
    self.convert_identifiers_to_labels()

    number_suffixes = ['f', 'F', 'i', 'I', 'u', 'U', 'l', 'L', 'ul', 'uL', 'Ul', 'UL', 'lu', 'lU', 'Lu', 'LU']
    tokens = self.combine_tokens_and_adjacent_types(tokens, 'number', 'identifier', number_suffixes)

    string_suffixes = ['c', 'w', 'd']
    self.tokens = self.combine_tokens_and_adjacent_types(tokens, 'string', 'identifier', string_suffixes)

    self.calc_statistics()

    tokens = self.source_tokens()
    tokens = Examiner.join_all_lines(tokens)

    self.calc_token_confidence()
    self.calc_token_2_confidence()

    num_operators = self.count_my_tokens(['operator', 'invalid operator'])
    if num_operators > 0:
      self.calc_operator_confidence(num_operators)
      allow_pairs = []
      self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
      self.calc_operator_3_confidence(tokens, num_operators, group_ends, allow_pairs)
      self.calc_operator_4_confidence(tokens, num_operators, group_starts, allow_pairs)

    self.calc_group_confidence(tokens, group_mids)

    operand_types_2 = ['number', 'symbol']
    self.calc_operand_n_confidence(tokens, operand_types_2, 2)
    self.calc_operand_n_confidence(tokens, operand_types, 4)

    self.calc_keyword_confidence()

    self.calc_paired_blockers_confidence(['{'], ['}'])
    self.calc_line_length_confidence(code, self.max_expected_line)
Ejemplo n.º 13
0
    def __init__(self, code, block_comment_limit):
        super().__init__()
        self.newlines_important = 'parens'

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder(None)
        integer_exponent_tb = IntegerExponentTokenBuilder(None)
        hex_integer_tb = PrefixedIntegerTokenBuilder('0x', False,
                                                     '0123456789abcdefABCDEF')
        real_tb = RealTokenBuilder(False, False, None)
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', None)
        imaginary_tb = SuffixedRealTokenBuilder(False, False, ['im', 'cx'],
                                                True, None)
        operand_types.append('number')

        leads = '_'
        extras = '_'
        suffixes = '!'
        identifier_tb = SuffixedIdentifierTokenBuilder(leads, extras, suffixes)
        operand_types.append('identifier')

        symbol_tb = PrefixedIdentifierTokenBuilder(':', 'symbol', True)
        operand_types.append('symbol')

        attribute_tb = PrefixedIdentifierTokenBuilder('@', 'attribute', False)
        operand_types.append('attribute')

        dollar_sign_tb = SingleCharacterTokenBuilder('$', 'identifier', True)

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 0)
        raw_string_tb = PrefixedRawStringTokenBuilder('raw', True, quotes)
        b_string_tb = PrefixedStringTokenBuilder('b', True, quotes)
        triple_quote_string_tb = TripleQuoteStringTokenBuilder(quotes)
        operand_types.append('string')

        comment_tb = LeadToEndOfLineTokenBuilder('#', True, 'comment')
        nested_comment_tb = NestedCommentTokenBuilder('#=', '=#',
                                                      block_comment_limit)

        line_continuation_tb = SingleCharacterTokenBuilder(
            '\\', 'line continuation', False)
        terminators_tb = SingleCharacterTokenBuilder(';',
                                                     'statement terminator',
                                                     False)

        known_operators = [
            'where', 'in', 'isa', '′', "'", '+', '-', '*', '/', '\\', '^', '%',
            '//', '<<', '>>', '<<<', '>>>', ':', '=', '==', '!=', '===', '!==',
            '+=', '-=', '*=', '/=', '^=', '%=', '<', '>', '<=', '>=', '~', '&',
            '|', '!', '&&', '||', '?', '.', '<:', '>:', '::', '->', '...',
            '..', '∀', '≤', '≥', '⊻', '⊽', '⊼'
        ]

        # 0x391 through 0x3a9 (capital)
        # 0x3b1 through 0x3c9 (small)
        greek_letters = [
            'α', 'β', 'γ', 'δ', 'ε', 'ζ', 'η', 'θ', 'ι', 'κ', 'λ', 'μ', 'ν',
            'ξ', 'ο', 'π', 'ρ', 'ς', 'σ', 'τ', 'υ', 'φ', 'χ', 'ψ', 'ω'
        ]

        greek_letter_tb = CaseSensitiveListTokenBuilder(
            greek_letters, 'identifier', True)

        self.unary_operators = [
            'isa', '+', '-', '~', '!', '.', ':', '::', "'", '<:', '>:', 'in',
            '..'
        ]

        self.postfix_operators = ['...', '′']

        groupers = ['(', ')', ',', '[', ']', '{', '}']
        # group_starts = ['(', '[', ',', '{']
        group_mids = [',']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        keywords = [
            'baremodule', 'begin', 'break', 'catch', 'const', 'continue', 'do',
            'else', 'elseif', 'end', 'export', 'finally', 'for', 'function',
            'global', 'if', 'import', 'let', 'local', 'macro', 'module',
            'quote', 'return', 'struct', 'try', 'using', 'while', 'abstract',
            'mutable', 'primitive', 'type'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        types = [
            'Int8', 'UInt8', 'Int16', 'UInt16', 'Int32', 'UInt32', 'Int64',
            'UInt64', 'Int128', 'UInt128', 'Float16', 'Float32', 'Float64',
            'Bool', 'Char'
        ]

        types_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
        operand_types.append('type')

        values = ['false', 'true']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, line_continuation_tb, terminators_tb,
            integer_tb, integer_exponent_tb, hex_integer_tb, real_tb,
            real_exponent_tb, imaginary_tb, keyword_tb, types_tb, values_tb,
            groupers_tb, known_operator_tb, identifier_tb, symbol_tb,
            attribute_tb, dollar_sign_tb, greek_letter_tb, string_tb,
            raw_string_tb, b_string_tb, triple_quote_string_tb, comment_tb,
            nested_comment_tb, self.unknown_operator_tb, invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid')
        tokens = JuliaExaminer.split_symbols_to_operators_identifiers(
            tokens, group_ends)
        self.tokens = tokens
        self.convert_keywords_to_identifiers()

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_parens_continued_lines(tokens)
        tokens = Examiner.join_operator_continued_lines(
            tokens, self.postfix_operators)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            # self.calc_operator_4_confidence(tokens, num_operators, group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number', 'identifier', 'symbol']
        self.calc_operand_confidence(tokens, operand_types_2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
Ejemplo n.º 14
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder("'")
        integer_exponent_tb = IntegerExponentTokenBuilder("'")
        hex_integer_tb = PrefixedIntegerTokenBuilder(
            '0x', False, '0123456789abcdefABCDEF_')
        long_integer_tb = SuffixedIntegerTokenBuilder('L', False, None)
        real_tb = RealTokenBuilder(False, False, "'")
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', "'")
        float_real_tb = SuffixedRealTokenBuilder(False, False, ['f'], False,
                                                 None)
        operand_types.append('number')

        leads = '_'
        extras = '_'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        operand_types.append('identifier')

        symbol_tb = PrefixedIdentifierTokenBuilder("'", 'symbol', True)
        operand_types.append('symbol')

        quotes = ['"']
        string_tb = EscapedStringTokenBuilder(quotes, 0)
        triple_string_tb = TripleQuoteStringTokenBuilder(quotes)
        operand_types.append('string')

        slash_slash_comment_tb = SlashSlashCommentTokenBuilder()
        slash_star_comment_tb = SlashStarCommentTokenBuilder()

        line_continuation_tb = SingleCharacterTokenBuilder(
            '\\', 'line continuation', False)
        terminators_tb = SingleCharacterTokenBuilder(';',
                                                     'statement terminator',
                                                     False)

        known_operators = [
            '+', '-', '*', '/', '%', '&', '|', '^', '<<', '>>', '&&', '||',
            '=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=',
            '>:', '⇒', '=>', '=', '<%', '<:', '←', '<-', '#', '@', '==', '!=',
            '>', '<', '>=', '<=', '!', '~', '<<<', '>>>', '.', '++', '--',
            'new'
        ]

        self.unary_operators = ['+', '-', '*', '!', '~', '++', '--', 'new']

        self.postfix_operators = ['++', '--']

        groupers = ['(', ')', ',', '[', ']', '{', '}', ':']
        group_starts = ['(', '[', ',', '{']
        group_mids = [',', ':']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        keywords = [
            'abstract', 'case', 'catch', 'class', 'def', 'do', 'else',
            'extends', 'final', 'finally', 'for', 'forSome', 'if', 'implicit',
            'import', 'lazy', 'match', 'object', 'override', 'package',
            'private', 'protected', 'return', 'sealed', 'then', 'throw',
            'trait', 'try', 'type', 'val', 'var', 'while', 'with', 'yield'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        values = ['false', 'true', 'null', 'this', 'super']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, line_continuation_tb, terminators_tb,
            integer_tb, integer_exponent_tb, hex_integer_tb, long_integer_tb,
            real_tb, real_exponent_tb, float_real_tb, keyword_tb, values_tb,
            groupers_tb, known_operator_tb, identifier_tb, symbol_tb,
            string_tb, triple_string_tb, slash_slash_comment_tb,
            slash_star_comment_tb, self.unknown_operator_tb,
            invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        self.tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid')

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()
        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
Ejemplo n.º 15
0
    def __init__(self, code, version):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder("'")
        integer_exponent_tb = IntegerExponentTokenBuilder("'")
        hex_integer_tb = PrefixedIntegerTokenBuilder('0x', False,
                                                     '0123456789abcdefABCDEF')
        binary_integer_tb = PrefixedIntegerTokenBuilder('0b', False, '01')
        real_tb = RealTokenBuilder(False, False, "'")
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', "'")
        operand_types.append('number')

        leads = '_'
        extras = '_'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        operand_types.append('identifier')

        command_tb = PrefixedIdentifierTokenBuilder('!', 'command', False)

        metaclass_tb = PrefixedIdentifierTokenBuilder('?', 'metaclass', False)

        quotes = ['"', "'", "’"]
        string_tb = MatlabStringTokenBuilder(quotes, False)
        operand_types.append('string')

        line_comment_m_tb = LeadToEndOfLineTokenBuilder('%', False, 'comment')
        line_comment_o_tb = LeadToEndOfLineTokenBuilder('#', False, 'comment')
        block_comment_m_tb = BlockTokenBuilder('%{', '%}', 'comment')
        block_comment_o_tb = BlockTokenBuilder('#{', '#}', 'comment')

        line_continuation_tb = KeywordTokenBuilder('...', 'line continuation')

        known_operators = [
            '+', '-', '.*', '*', './', '/', '\\', '.^', '^', ".'", "'", '=',
            '==', '~=', '>', '>=', '<', '<=', '&', '|', '&&', '||', '~', '@',
            '.', '.?'
        ]

        operators_octave = [
            '++', '--', '+=', '-=', '*=', '/=', '^=', '!', '!=', '**'
        ]

        if version == 'octave':
            known_operators += operators_octave

        self.unary_operators = ['+', '-', '~', '@']

        self.postfix_operators = ["'"]

        groupers = ['(', ')', ',', '[', ']', '{', '}', ';', ':']
        group_starts = ['(', '[', ',', '{']
        # group_mids = [',', ';', ':']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        keywords = [
            'break', 'case', 'catch', 'classdef', 'continue', 'else', 'elseif',
            'end', 'for', 'function', 'global', 'if', 'otherwise', 'parfor',
            'persistent', 'return', 'spmd', 'switch', 'try', 'while'
        ]

        keywords_octave = ['endfor', 'endif', 'endwhile']

        if version == 'octave':
            keywords += keywords_octave

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        values = ['inf', 'Nan']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, line_continuation_tb, integer_tb,
            integer_exponent_tb, hex_integer_tb, binary_integer_tb, real_tb,
            real_exponent_tb, keyword_tb, values_tb, groupers_tb,
            known_operator_tb, identifier_tb, command_tb, metaclass_tb,
            string_tb, line_comment_m_tb, block_comment_m_tb
        ]

        tokenbuilders_2 = [line_comment_o_tb, block_comment_o_tb]

        if version == 'octave':
            tokenbuilders += tokenbuilders_2

        tokenbuilders_9 = [self.unknown_operator_tb, invalid_token_builder]

        tokenbuilders += tokenbuilders_9

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        self.tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid')

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        # self.calc_group_confidence(tokens, group_mids)

        # operand_types_2 = ['number']
        # self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)