Ejemplo n.º 1
0
    def setoptions(self, optionStr):
        optionStr = self.tonormalizedoptionstring(optionStr)
        if optionStr == 'default':
            optionValues = self.__defaultOptionValues
        else:
            optionValues = self.optionValues.copy()
            if optionStr != None and optionStr != '':
                for i in range(0, len(optionStr), 2):
                    name = optionStr[i]
                    value = optionStr[i + 1]
                    assert name in optionValues
                    optionValues[name] = self.__tov(value)
        self.optionValues = optionValues

        if optionValues['k']:
            switch_statement_rule = """
    | r_case +(xcep(colon | semicolon | eof) any) colon
    | r_default colon
    | r_switch (block scan ^)
"""
            simple_statement_removal_rule = ""
        else:
            switch_statement_rule = """
    | +((r_case (id | l_bool | l_char | l_int) | r_default) colon) ((block scan ^) ?(null <- r_break semicolon) | (block <- (insert(LB) *(xcep(r_break | r_case | r_default) ^) insert(RB))) ?(null <- r_break semicolon)) // enclose each case clause by block
    | r_switch (block scan ^)
"""
            simple_statement_removal_rule = """
TEXT scan= (null <- simple_statement)
    | r_class id *((r_extends | r_implements) id *(comma id)) (block scan ^); // recurse into top level of class definition

"""
        patternStr = """TEXT scan=
    preq("&(a-z);") (
        (r_abstract <- "abstract")
        | (r_assert <- "assert")
        | (r_boolean <- "boolean")
        | (r_break <- "break")
        | (r_byte <- "byte")
        | (r_case <- "case")
        | (r_catch <- "catch")
        | (m_charAt <- "charAt")
        | (r_char <- "char")
        | (r_class <- "class")
        | (m_clone <- "clone")
        | (m_compareTo <- "compareTo")
        | (r_continue <- "continue")
        | (r_const <- "const")
        | (r_default <- "default")
        | (m_dispose <- "dispose")
        | (r_double <- "double")
        | (r_do <- "do")
        | (r_else <- "else")
        | (r_enum <- "enum")
        | (m_equals <- "equals")
        | (r_extends <- "extends")
        | (r_false <- "false")
        | (r_finally <- "finally")
        | (r_final <- "final")
        | (r_float <- "float")
        | (r_for <- "for")
        | (m_getClass <- "getClass")
        | (m_get <- "get")
        | (r_goto <- "goto")
        | (m_hashCode <- "hashCode")
        | (m_hasNext <- "hasNext")
        | (r_if <- "if")
        | (r_implements <- "implements")
        | (r_import <- "import")
        | (r_instanceof <- "instanceof")
        | (r_interface <- "interface")
        | (r_int <- "int")
        | (m_iterator <- "iterator")
        | (m_length <- "length")
        | (r_long <- "long")
        | (r_native <- "native")
        | (r_new <- "new")
        | (m_next <- "next")
        | (r_null <- "null")
        | (r_package <- "package")
        | (r_private <- "private")
        | (r_protected <- "protected")
        | (r_public <- "public")
        | (r_return <- "return")
        | (m_run <- "run")
        | (r_short <- "short")
        | (m_size <- "size")
        | (r_static <- "static")
        | (r_strictfp <- "strictfp")
        // | (r_super <- "super") // keyword "super" is treated as an identifier
        | (r_switch <- "switch")
        | (r_synchronized <- "synchronized")
        // | (r_this <- "this") // keyword "this" is treated as an identifier
        | (m_toArray <- "toArray")
        | (m_toString <- "toString")
        | (r_throws <- "throws")
        | (r_throw <- "throw")
        | (r_transient <- "transient")
        | (r_true <- "true")
        | (r_try <- "try")
        | (r_void <- "void")
        | (r_volatile <- "volatile")
        | (r_while <- "while")
    ) xcep("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);")
    | (word <- ("&(a-z);" | "&(A-Z);" | "_" | "$") *("&(a-z);" | "&(A-Z);" | "_" | "$" | "&(0-9);"))
    | (multiline_comment <- "/*" *(xcep("*/") any) "*/")
    | (singleline_comment <- "//" *(xcep(eol) any))
    | (l_string <- "&quot;" *("&bslash;" any | xcep("&quot;" | eol) any) "&quot;")
    | (l_char <- "&squot;" *("&bslash;" any | xcep("&squot;" | eol) any) "&squot;")
    | (l_float <- (
            ((+"&(0-9);" "." *"&(0-9);")|(*"&(0-9);" "." +"&(0-9);")) ?(("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "F") // modified by Jan Vlegels, 2007/Apr/23
            | +"&(0-9);" ("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "F") 
            | +"&(0-9);" ("f" | "F")
    )
    | (l_int <- (("0x" | "0X") +("&(0-9);" | "&(a-f);" | "&(A-F);") | +"&(0-9);") *("l" | "L"))
    | (semicolon <- ";")
    | (comma <- ",") 
    | (LB <- "{") | (RB <- "}") 
    | (LP <- "(") | (RP <- ")") 
    | (LK <- "[") | (RK <- "]") 
    // 4 char operator
    | (op_signed_rshift_assign <- ">>>=")
    // 3 char operators
    | (op_lshift_assign <- "<<=")
    | (op_rshift_assign <- ">>=")
    | (op_signed_rshift <- ">>>")
    // 2 char operators
    | (op_lshift <- "<<")
    // ">>" will not be recognized, becase this parser can not distinguish ">>" from ">" ">"
    | (op_increment <- "++")
    | (op_decrement <- "--")
    | (op_le <- "<=")
    | (op_ge <- ">=")
    | (op_eq <- "==")
    | (op_ne <- "!=")
    | (op_add_assign <- "+=")
    | (op_sub_assign <- "-=")
    | (op_mul_assign <- "*=")
    | (op_div_assign <- "/=")
    | (op_mod_assign <- "%%=")
    | (op_and_assign <- "&amp;" "=")
    | (op_xor_assign <- "^=")
    | (op_or_assign <- "|=")
    | (op_logical_and <- "&amp;" "&amp;")
    | (op_logical_or <- "||")
    // single char operators
    | (op_star <- "*") // may mean mul or wildcard
    | (op_div <- "/")
    | (op_mod <- "%%")
    | (op_plus <- "+") // may mean add or sign plus
    | (op_minus <- "-") // may mean sub or sign minus
    | (op_amp <- "&amp;") // may mean bitwise
    | (op_logical_neg <- "!")
    | (op_complement <- "~")
    | (op_or <- "|")
    | (op_xor <- "^")
    | (op_assign <- "=")
    | (OL <- "<") // may mean less than or template parameter
    | (OG <- ">") // may mean greater than or template parameter
    | (ques <- "?") | (colon <- ":") | (dot <- ".");

TEXT scan= null <- multiline_comment | singleline_comment | " " | "&t;" | "&f;" | "&v;"| eol;

TEXT scan= (r_int <- r_long | r_short) | (r_double <- r_float) | (l_bool <- r_true | r_false)
    | (l_string <- word dot (word match "getString") LP l_string RP); // support for externalized string

TEXT scan= xcep(LB | RB | LP | RP | LK | RK) any 
    | (block <- LB *^ RB)
    | (param <- LP *^ RP)
    | (index <- LK *^ RK);

TEXT scan= word *(dot word) (template_param <- 
        OL
        ?(ques ((word match "super") | r_extends)) ^
        *((comma | op_amp) ?(ques ((word match "super") | r_extends)) ^)
        OG
    ) 
    | (null <- 
        OL 
        (word *(dot word) ((word match "super") | r_extends) ^ | ^) 
        *((comma | op_amp) (word *(dot word) ((word match "super") | r_extends) ^ | ^)) 
        OG
    )
    | word *(dot word) *index | ques *index
    | (block scan ^) | (param scan ^); // recurse into block, and param

TEXT scan= ?(null <- (word match "this") dot) (id <- word *(dot word xcep(param)) ?template_param)
    | (id <- (word match "this"))
    | (l_string <- l_string +(op_plus l_string))
    | (r_annotation_decl <- ("@" r_interface ))
    | (block scan ^) | (param scan ^) | (index scan ^); // recurse into block, index, and param

// remove package, import
TEXT scan= null <- r_package id semicolon | r_import id ?(dot op_star) semicolon;

TEXT scan=
    id (index match LK RK) op_assign (block scan ^) semicolon // T a[] = { ... };
    | r_new id *(dot id) +(index scan ^)
    | id +((index match LK RK) | (index scan ^))
    | id insert(dot) insert(m_get) (index match (LP <- LK) *(xcep(RK) ^) (RP <- RK))
    | dot (m_length <- m_size (param match LP RP)) 
    | dot m_length (null <- (param match LP RP))
    | (block scan ^) // recurse into block
    | (param scan ^) | (index scan ^); // recurse into expression

TEXT scan= (null <- r_private | r_public | r_protected | r_synchronized | r_final | r_abstract | r_strictfp | r_volatile | r_transient)
    | (null <- "@" id ?param)
    | (null <- r_static xcep(LB))
    | (null <- +(r_extends id *(comma id) | r_implements id *(comma id)))
    | (null <- r_throws id *(comma id))
    | (interface_block <- (def_block <- r_interface id ?(r_extends id *(comma id)) block))
    | (anotation_block <- (def_block <- r_annotation_decl id block))
    | (block scan ^) | (param scan ^); // recurse into block and param

// remove array initialization tables
TEXT scan= op_assign (initialization_block <- preq(block)) (null <- block) semicolon
    | index (initialization_block <- preq(block)) (null <- block)
    | (block scan ^) // recurse into block
    | (param scan ^) | (index scan ^); // recurse into expression

TEXT scan= xcep(id | param | index | l_float | l_int | block) any (null <- op_minus) // remove unary minus
    | (method_like <- m_charAt | m_compareTo | m_dispose | m_equals | m_getClass | m_get | m_hashCode | m_hasNext | m_iterator | m_length | m_next | m_run | m_size | m_toArray | m_toString)
    | ques insert(c_cond) // insert tokens for control-flow complexity counter
    | (block scan ^) // recurse into block
    | (param scan ^) | (index scan ^); // recurse into expression

// remove simple delegations; remove empty method definition; remove getter, setter; remove redundant paren of return statement; remove assertion
TEXT scan=
    (null <- (r_void | r_boolean | r_byte | r_char | r_double | r_float | r_int | r_short | r_object | r_string | id) *index
         (id | method_like) param ((block match LB ?r_return id dot id param semicolon RB) | (block match LB RB)))
    | (null <- (r_boolean | r_byte | r_char | r_double | r_float | r_int | r_short | r_object | r_string | id) *index
         (id | method_like) (param match LP RP) (block match LB r_return id semicolon RB))
    | (null <- r_void (id | method_like) param (block match LB id op_assign id semicolon RB))
    | r_return (param match (null <- LP) *(xcep(RP) any) (null <- RP)) semicolon
    | (null <- r_assert *(xcep(semicolon | eof) any) semicolon)
    | (block scan ^) | (param scan ^); // recurse into block and param

TEXT scan= 
    r_if param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) *(r_else r_if param (block | (block <- insert(LB) ^ insert(RB)))) ?(r_else (block | (block <- insert(LB) ^ insert(RB)))) 
    | r_else (block | (block <- insert(LB) ^ insert(RB)))
    | r_while param ((block scan ^) | (block <- insert(LB) ^ insert(RB)))
    | r_for param ((block scan ^) | (block <- insert(LB) ^ insert(RB)))
    | r_do ((block scan ^) | (block <- insert(LB) ^ insert(RB))) r_while param semicolon
    | r_try (block scan ^) *((r_catch param | r_finally) (block scan ^))
    | (r_catch param | r_finally) (block scan ^)
    %(switch_statement_rule)s
    | *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_do | r_try | r_catch | r_finally | r_switch) any) semicolon
    | (block scan ^) | (param scan ^); // recurse into block and param

TEXT scan= 
    r_if param block *(r_else r_if param block) ?(r_else block)
    | r_else block
    | r_while param block
    | r_for param block
    | r_do block r_while param semicolon
    | r_switch param block
    | r_try block *((r_catch param | r_finally) block)
    | (r_catch param | r_finally) block
    | (simple_statement <- *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_do | r_try | r_catch | r_finally| r_switch | r_case | r_default) any) semicolon)
    | (block scan ^) | (param scan ^); // recurse into block and param

%(simple_statement_removal_rule)s

// enclose class/method/constructor definition by block
TEXT scan= (def_block <- r_class id (block scan ^))
    | (def_block <- r_new id param (block scan ^)) 
    | (def_block <- (r_void | r_int | r_long | r_short | r_double | r_float | r_boolean | r_char | r_byte | id) *(index match LK RK) (id | method_like) param (block scan ^))
    | (def_block <- id param (block scan ^)) // constructor
    | (block scan ^) | (param scan ^); // recurse into block and param

// insert tokens for control-flow complexity counter
TEXT scan= (r_if | r_switch) insert(c_cond) | (r_for | r_while) insert(c_loop)
    | (id | method_like) insert(c_func) (param scan ^)
    | (def_block scan ^) | (block scan ^) | (param scan ^) | (index scan ^) | (simple_statement scan ^);
""" % (locals())
        self.pat = easytorq.Pattern(patternStr)

        fmt = easytorq.CngFormatter()

        # parameter by default
        fmt.addreplace('id', 'id|%s')

        fmt.addreplace('id', 'id|%s')
        fmt.addflatten('block')
        fmt.addreplace('LB', '(brace')
        fmt.addreplace('RB', ')brace')
        fmt.addflatten('word')
        fmt.addflatten('param')
        fmt.addreplace('LP', '(paren')
        fmt.addreplace('RP', ')paren')
        fmt.addflatten('index')
        fmt.addreplace('LK', '(braket')
        fmt.addreplace('RK', ')braket')
        fmt.addflatten('simple_statement')
        fmt.addreplace('semicolon', 'suffix:semicolon')
        fmt.addreplace('colon', 'suffix:colon')
        fmt.addformat('def_block', '(def_block', ')def_block')
        fmt.addflatten('method_like')
        if not optionValues['d']:
            # requires exact match
            fmt.addreplace('l_int', 'l_int=%s')
            fmt.addreplace('l_float', 'l_float=%s')
            fmt.addreplace('l_bool', 'l_bool=%s')
        else:
            # non parameter by default
            fmt.addreplace('l_bool', 'l_bool|%s')
            fmt.addreplace('l_int', 'l_int|%s')
            fmt.addreplace('l_float', 'l_float|%s')

        if optionValues['r']:
            fmt.addnone('interface_block')
        else:
            fmt.addflatten('interface_block')
        fmt.addnone('anotation_block')
        if not optionValues['s']:
            # requires exact match
            fmt.addreplace('l_string', 'l_string=%s')
            fmt.addreplace('l_char', 'l_char=%s')
        else:
            # non parameter by default
            fmt.addreplace('l_string', 'l_string|%s')
            fmt.addreplace('l_char', 'l_char|%s')

        self.fmt = fmt
Ejemplo n.º 2
0
    def setoptions(self, optionStr):
        if optionStr not in ( None, '', 'default' ):
            raise pp.InvalidOptionError, "invalid option: " + optionStr
        patternStr = """TEXT scan= +(xcep(eof) any) | insert(eol) eof; // ensure a line terminates by eol

TEXT match= *(
        (macro_line <- "#" *(xcep(eof | eol) any)) eol
        | ?(label <- +("&(a-z);" | "&(A-Z)" | "&(0-9);" | "_") ":") *(xcep(eof | eol) any) eol
    ) 
    *(xcep(eof) any) eof;

TEXT scan= (comment <- "'" *(xcep(eof | eol) any)) 
    | (comment <- "rem" (" " | "&t;") *(xcep(eof | eol) any))
    | (null <- ((" " | "&t;") "_" *(" " | "&t;") eol)); // Continuation line

TEXT scan=
    preq("&(a-z);" | "&(A-Z);") (
        (r_AddHandler <- "AddHandler" | "addhandler" | "ADDHANDLER")
        | (r_AddressOf <- "AddressOf" | "addressof" | "ADDRESSOF")
        | (r_Alias <- "Alias" | "alias" | "ALIAS")
        | (r_AndAlso <- "AndAlso" | "andalso" | "ANDALSO")
        | (r_And <- "And" | "and" | "AND")
        | (r_Ansi <- "Ansi" | "ansi" | "ANSI")
        | (r_Assembly <- "Assembly" | "assembly" | "ASSEMBLY")
        | (r_As <- "As" | "as" | "AS")
        | (r_Auto <- "Auto" | "auto" | "AUTO")
        | (m_BeginProperty <- "BeginProperty" | "beginproperty" | "BEGINPROPERTY")
        | (r_Begin <- "Begin" | "begin" | "BEGIN")
        | (r_Boolean <- "Boolean" | "boolean" | "BOOLEAN")
        | (r_ByRef <- "ByRef" | "byref" | "BYREF")
        | (r_Byte <- "Byte" | "byte" | "BYTE")
        | (r_ByVal <- "ByVal" | "byval" | "BYVAL")
        | (r_Call <- "Call" | "call" | "CALL")
        | (r_Case <- "Case" | "case" | "CASE")
        | (r_Catch <- "Catch" | "catch" | "CATCH")
        | (r_CBool <- "CBool" | "cbool" | "CBOOL")
        | (r_CByte <- "CByte" | "cbyte" | "CBYTE")
        | (r_CChar <- "CChar" | "cchar" | "CCHAR")
        | (r_CDate <- "CDate" | "cdate" | "CDATE")
        | (r_CDec <- "CDec" | "cdec" | "CDEC")
        | (r_CDbl <- "CDbl" | "cdbl" | "CDBL")
        | (r_Char <- "Char" | "char" | "CHAR")
        | (r_CInt <- "CInt" | "cint" | "CINT")
        | (r_Class <- "Class" | "class" | "CLASS")
        | (r_CLng <- "CLng" | "clng" | "CLNG")
        | (r_CObj <- "CObj" | "cobj" | "COBJ")
        | (r_Const <- "Const" | "const" | "CONST")
        | (r_CShort <- "CShort" | "cshort" | "CSHORT")
        | (r_CSng <- "CSng" | "csng" | "CSNG")
        | (r_CStr <- "CStr" | "cstr" | "CSTR")
        | (r_CType <- "CType" | "ctype" | "CTYPE")
        | (r_Date <- "Date" | "date" | "DATE")
        | (r_Decimal <- "Decimal" | "decimal" | "DECIMAL")
        | (r_Declare <- "Declare" | "declare" | "DECLARE")
        | (r_Default <- "Default" | "default" | "DEFAULT")
        | (r_Delegate <- "Delegate" | "delegate" | "DELEGATE")
        | (r_Dim <- "Dim" | "dim" | "DIM")
        | (r_DirectCast <- "DirectCast" | "directcast" | "DIRECTCAST")
        | (r_Double <- "Double" | "double" | "DOUBLE")
        | (r_Do <- "Do" | "do" | "DO")
        | (r_Each <- "Each" | "each" | "EACH")
        | (r_ElseIf <- "ElseIf" | "elseif" | "ELSEIF")
        | (r_Else <- "Else" | "else" | "ELSE")
        | (m_EndProperty <- "EndProperty" | "endproperty" | "ENDPROPERTY")
        | (r_End <- "End" | "end" | "END")
        | (r_Enum <- "Enum" | "enum" | "ENUM")
        | (r_Erase <- "Erase" | "erase" | "ERASE")
        | (r_Error <- "Error" | "error" | "ERROR")
        | (r_Event <- "Event" | "event" | "EVENT")
        | (r_Exit <- "Exit" | "exit" | "EXIT")
        | (r_False <- "False" | "false" | "FALSE")
        | (r_Finally <- "Finally" | "finally" | "FINALLY")
        | (r_For <- "For" | "for" | "FOR")
        | (r_Friend <- "Friend" | "friend" | "FRIEND")
        | (r_Function <- "Function" | "function" | "FUNCTION")
        | (r_GetType <- "GetType" | "gettype" | "GETTYPE")
        | (r_Get <- "Get" | "get" | "GET")
        | (r_GoSub <- "GoSub" | "gosub" | "GOSUB")
        | (r_GoTo <- "GoTo" | "goto" | "GOTO")
        | (r_Handles <- "Handles" | "handles" | "HANDLES")
        | (r_If <- "If" | "if" | "IF")
        | (r_Implements <- "Implements" | "implements" | "IMPLEMENTS")
        | (r_Imports <- "Imports" | "imports" | "IMPORTS")
        | (r_Inherits <- "Inherits" | "inherits" | "INHERITS")
        | (r_Integer <- "Integer" | "integer" | "INTEGER")
        | (r_Interface <- "Interface" | "interface" | "INTERFACE")
        | (r_In <- "In" | "in" | "IN")
        | (m_IsArray <- "IsArray" | "isarray" | "ISARRAY")
        | (m_IsDate <- "IsDate" | "isdate" | "ISDATE")
        | (m_IsEmpty <- "IsEmpty" | "isempty" | "ISEMPTY")
        | (m_IsNull <- "IsNull" | "isnull" | "ISNULL")
        | (m_IsNumeric <- "IsNumeric" | "isnumeric" | "ISNUMERIC")
        | (m_IsObject <- "IsObject" | "isobject" | "ISOBJECT")
        | (r_Is <- "Is" | "is" | "IS")
        | (r_Let <- "Let" | "let" | "LET")
        | (r_Lib <- "Lib" | "lib" | "LIB")
        | (r_Like <- "Like" | "like" | "LIKE")
        | (r_Long <- "Long" | "long" | "LONG")
        | (r_Loop <- "Loop" | "loop" | "LOOP")
        // | (r_Me <- "Me" | "me" | "ME")
        | (r_Module <- "Module" | "module" | "MODULE")
        | (r_Mod <- "Mod" | "mod" | "MOD")
        | (r_MustInherit <- "MustInherit" | "mustinherit" | "MUSTINHERIT")
        | (r_MustOverride <- "MustOverride" | "mustoverride" | "MUSTOVERRIDE")
        | (r_MyBase <- "MyBase" | "mybase" | "MYBASE")
        | (r_MyClass <- "MyClass" | "myclass" | "MYCLASS")
        | (r_Namespace <- "Namespace" | "namespace" | "NAMESPACE")
        | (r_New <- "New" | "new" | "NEW")
        | (r_Next <- "Next" | "next" | "NEXT")
        | (r_Nothing <- "Nothing" | "nothing" | "NOTHING")
        | (r_NotInheritable <- "NotInheritable" | "notinheritable" | "NOTINHERITABLE")
        | (r_NotOverridable <- "NotOverridable" | "notoverridable" | "NOTOVERRIDABLE")
        | (r_Not <- "Not" | "not" | "NOT")
        | (r_Object <- "Object" | "object" | "OBJECT")
        | (r_On <- "On" | "on" | "ON")
        | (r_Optional <- "Optional" | "optional" | "OPTIONAL")
        | (r_Option <- "Option" | "option" | "OPTION")
        | (r_OrElse <- "OrElse" | "orelse" | "ORELSE")
        | (r_Or <- "Or" | "or" | "OR")
        | (r_Overloads <- "Overloads" | "overloads" | "OVERLOADS")
        | (r_Overridable <- "Overridable" | "overridable" | "OVERRIDABLE")
        | (r_Overrides <- "Overrides" | "overrides" | "OVERRIDES")
        | (r_ParamArray <- "ParamArray" | "paramarray" | "PARAMARRAY")
        | (r_Preserve <- "Preserve" | "preserve" | "PRESERVE")
        | (r_Private <- "Private" | "private" | "PRIVATE")
        | (r_Property <- "Property" | "property" | "PROPERTY")
        | (r_Protected <- "Protected" | "protected" | "PROTECTED")
        | (r_Public <- "Public" | "public" | "PUBLIC")
        | (r_RaiseEvent <- "RaiseEvent" | "raiseevent" | "RAISEEVENT")
        | (r_ReadOnly <- "ReadOnly" | "readonly" | "READONLY")
        | (r_ReDim <- "ReDim" | "redim" | "REDIM")
        | (r_REM <- "REM" | "rem" | "REM")
        | (r_RemoveHandler <- "RemoveHandler" | "removehandler" | "REMOVEHANDLER")
        | (r_Resume <- "Resume" | "resume" | "RESUME")
        | (r_Return <- "Return" | "return" | "RETURN")
        | (r_Select <- "Select" | "select" | "SELECT")
        | (r_Set <- "Set" | "set" | "SET")
        | (r_Shadows <- "Shadows" | "shadows" | "SHADOWS")
        | (r_Shared <- "Shared" | "shared" | "SHARED")
        | (r_Short <- "Short" | "short" | "SHORT")
        | (r_Single <- "Single" | "single" | "SINGLE")
        | (r_Static <- "Static" | "static" | "STATIC")
        | (r_Step <- "Step" | "step" | "STEP")
        | (r_Stop <- "Stop" | "stop" | "STOP")
        | (r_String <- "String" | "string" | "STRING")
        | (r_Structure <- "Structure" | "structure" | "STRUCTURE")
        | (r_Sub <- "Sub" | "sub" | "SUB")
        | (r_SyncLock <- "SyncLock" | "synclock" | "SYNCLOCK")
        | (r_Then <- "Then" | "then" | "THEN")
        | (r_Throw <- "Throw" | "throw" | "THROW")
        | (r_To <- "To" | "to" | "TO")
        | (r_True <- "True" | "true" | "TRUE")
        | (r_Try <- "Try" | "try" | "TRY")
        | (r_TypeOf <- "TypeOf" | "typeof" | "TYPEOF")
        | (r_Type <- "Type" | "type" | "TYPE")
        | (r_Unicode <- "Unicode" | "unicode" | "UNICODE")
        | (r_Until <- "Until" | "until" | "UNTIL")
        | (r_Variant <- "Variant" | "variant" | "VARIANT")
        | (r_Wend <- "Wend" | "wend" | "WEND")
        | (r_When <- "When" | "when" | "WHEN")
        | (r_While <- "While" | "while" | "WHILE")
        | (r_WithEvents <- "WithEvents" | "withevents" | "WITHEVENTS")
        | (r_With <- "With" | "with" | "WITH")
        | (r_WriteOnly <- "WriteOnly" | "writeonly" | "WRITEONLY")
        | (r_Xor <- "Xor" | "xor" | "XOR")
        | (r_GoSub <- "GoSub" | "gosub" | "GOSUB")
        | (r_Let <- "Let" | "let" | "LET")
        | (r_Variant <- "Variant" | "variant" | "VARIANT")
        | (m_MsgBox <- "MsgBox" | "msgbox" | "MSGBOX")
        | (m_Iif <- "Iif" | "iif" | "IIF")
        | (m_InputBox <- "InputBox" | "inputbox" | "INPUTBOX")
    ) xcep("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);")
    | (word <- ("&(a-z);" | "&(A-Z);" | "_") *("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);"))
    | (word <- "[" ("&(a-z);" | "&(A-Z);" | "_") *("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);") "]")
    | (l_char <- "&quot;" *("&quot;&quot;" | xcep("&quot;" | eol) any) "&quot;") ("C" | "c")
    | (l_string <- "&quot;" *("&quot;&quot;" | xcep("&quot;" | eol) any) "&quot;")
    | (l_string <- "#" *(" " | "&t;" | "&(0-9);" | "/" | ":" | "AM" | "am" | "PM" | "pm") "#") // date
    | (l_float <- (
            +"&(0-9);" "." *"&(0-9);" ?(("e" | "E") ?("-" | "+") +"&(0-9);") ?("F" | "f" | "R" | "r" | "D" | "d") 
            | +"&(0-9);" ("e" | "E") ?("-" | "+") +"&(0-9);") ?("F" | "f" | "R" | "r" | "D" | "d") 
            | +"&(0-9);" ("F" | "f" | "R" | "r" | "D" | "d")
    )
    | (l_int <- (
        "&amp;" ("H" | "h") +("&(0-9);" | "&(a-f);" | "&(A-F);") 
        | "&amp;" ("O" | "o") +"&(0-7);"
        | +"&(0-9);") 
        ?("S" | "s" | "I" | "i" | "L"| "l")
    )
    | (LP <- "(") | (RP <- ")") 
    | (op_plus_eq <- "+=")
    | (op_minus_eq <- "-=")
    | (op_mul_eq <- "*=")
    | (op_div_eq <- "/=")
    | (op_intdiv_eq <- "&slash;=")
    | (op_pow_eq <- "^=")
    | (op_append_eq <- "&=")
    | (op_lshift <- "<<")
    | (op_rshift <- ">>")
    | (op_not_eq <- "<>")
    | (op_ge <- ">=")
    | (op_le <- "<=")
    | (op_gt <- ">")
    | (op_lt <- "<")
    | (equal <- "=")
    | (exclamation <- "!")
    | (comma <- ",")
    | (dot <- ".")
    | (colon <- ":")
    | (plus <- "+")
    | (minus <- "-")
    | (op_mult <- "*")
    | (op_div <- "/")
    | (op_intdiv <- "&bslash;")
    | (amp <- "&amp;")
    | (op_power <- "^");

TEXT scan= (null <- macro_line | comment | label | " " | "&t;")
    | (null <- m_BeginProperty *(xcep(m_EndProperty) any) m_EndProperty)
    | insert(statement_terminator) (null <- eol);

TEXT scan= (r_EXIT_LOOP <- r_Exit (r_Do | r_While | r_Loop))
    | (r_Exit_For <- r_Exit r_for)
    | (r_Exit_Function <- r_Exit r_Function)
    | (r_Exit_Property <- r_Exit r_Property)
    | (r_Exit_Sub <- r_Exit r_Sub)
    | (r_Exit_Try <- r_Exit r_Try)
    | (r_On_Error <- r_On r_Error)
    | (r_Resume_Next <- r_Resume r_Next);

TEXT scan= (id <- ?(dot | exclamation) word *((dot | exclamation) word) ?("%" | amp | "@" | exclamation | "#" | "$"))
    | (statement_terminator <- ":")
    | (l_bool <- r_True | r_False)
    | (r_Integer <- r_Long | r_Byte) // type unification
    | (r_Double <- r_Single) // type unification
    | (null <- r_Then | r_Call | r_Let | r_ByRef | r_Dim)
    | (null <- r_Public | r_Private | r_Protected ?r_Friend | r_Friend | r_Overloads | r_Overrides | r_Overridable);

TEXT scan= statement_terminator (null <- +statement_terminator);

TEXT scan= (null <- r_Declare *(xcep(statement_terminator| eof) any) statement_terminator)
    | (null <- r_Interface *(xcep(r_End) any | r_End xcep(r_Interface) any) r_End r_Interface)
    | (null <- r_WithEvents id r_As id statement_terminator);

TEXT scan= 
    // note: the following rule will not identify blocks around if statement, 
    //   becase I can not invent a parsing rule which support both "if...end if" statement 
    //   and "if statement" enclosed in a line, that appears without "end if".
    (block <- r_Begin *^ r_End xcep(r_Class | r_Function | r_Get | r_Interface | r_Module 
        | r_Namespace | r_Property | r_Set | r_Structure | r_Sub | r_Select | r_Type | r_Try | r_With | r_If))
    | (def_block <- r_Class ?(id *statement_terminator ?(null <- r_Inherits id)) (block <- *^) r_End r_Class)
    | (def_block <- r_Structure ?(id *statement_terminator ?(null <- r_Inherits id)) (block <- *^) r_End r_Structure)
    | (block <- r_Namespace ?(id *statement_terminator (block <- *^)) r_End r_Namespace)
    | (block <- r_Do (r_While | r_Until) *^ r_Loop)
    | (block <- r_Do *^ r_Loop ?(r_While  | r_Until))
    | (block <- insert(r_Do) r_While *^ (r_Loop <- r_End r_While | r_Wend)) // While ... End While, While ... Wend --> Do While ... Loop
    | (block <- r_For ?r_Each *^ r_Next)
    | (def_block <- r_Function *^ r_End r_Function)
    | (def_block <- r_Get *^ r_End r_Get)
    | (def_block <- r_Interface *^ r_End r_Interface)
    | (def_block <- r_Module *^ r_End r_Module)
    | (def_block <- r_Property ?(r_Get | r_Let | r_Set) *^ r_End r_Property)
    | (def_block <- r_Set preq(LP) *^ r_End r_Set)
    | (def_block <- r_Sub *^ r_End r_Sub)
    | (block <- r_Select r_Case *^ *(r_Case ?r_Else (block <- *^)) r_End r_Select)
    | (def_block <- r_Type *^ r_End r_Type)
    | (block <- r_Try (block <- *^) *(r_Catch (block <- *^)) r_End r_Try)
    | (block <- r_With ?(null <- id) (block <- *^) r_End r_With)
    | r_EXIT_LOOP | r_Exit_For | r_Exit_Function | r_Exit_Property | r_Exit_Sub | r_Exit_Try
    | r_Exit xcep(eof) any
    | r_End r_If
    | +xcep(eof | r_End | r_Class | r_Do | r_Loop | r_For | r_Next | r_Function | r_Get | r_Interface | r_Module | r_Namespace | 
        r_Property | r_Set | r_Structure | r_Sub | r_Select | r_Type | r_Try | r_With) any
    | xcep(eof | r_End | r_Loop | r_Next) any;

// insert tokens for control-flow complexity counter
TEXT scan= (r_End (r_If | r_Select | r_Loop | r_While))
    | (r_If | r_Select) insert(c_cond) | r_For insert(c_loop) | (r_Do | r_Loop) (r_While | r_Until) insert(c_loop) | r_While insert(c_loop)
    | (block scan ^) | (def_block scan ^); // recurse into block

TEXT scan= ((block scan ^) | (def_block scan ^)) *(null <- statement_terminator);
"""
        self.pat = easytorq.Pattern(patternStr)
        
        fmt = easytorq.CngFormatter()
        
        # parameter by default
        fmt.addreplace('id', 'id|%s')
        
        # non parameter by default
        fmt.addreplace('l_bool', 'l_bool|%s')
        fmt.addreplace('l_char', 'l_char|%s')
        fmt.addreplace('l_int', 'l_int|%s')
        fmt.addreplace('l_float', 'l_float|%s')
        fmt.addreplace('l_string', 'l_string|%s')
        
        fmt.addflatten('word')
        fmt.addreplace('LP', '(paren')
        fmt.addreplace('RP', ')paren')
        fmt.addformat('block', '(block', ')block')
        fmt.addformat('def_block', '(def_block', ')def_block')
        fmt.addreplace('statement_terminator', 'suffix:colon')
        self.fmt = fmt
Ejemplo n.º 3
0
    def setoptions(self, optionStr):
        optionStr = self.tonormalizedoptionstring(optionStr)
        if optionStr == 'default':
            optionValues = self.__defaultOptionValues
        else:
            optionValues = self.optionValues.copy()
            if optionStr != None and optionStr != '':
                for i in range(0, len(optionStr), 2):
                    name = optionStr[i]
                    value = optionStr[i + 1]
                    assert name in optionValues
                    optionValues[name] = self.__tov(value)
        self.optionValues = optionValues

        if optionValues['k']:
            switch_statement_rule = """
    | r_case +(xcep(colon | semicolon | eof) any) colon
    | r_default colon
    | r_switch (block scan ^)
"""
            simple_statement_removal_rule = ""
        else:
            switch_statement_rule = """
    | +((r_case (id | l_bool | l_char | l_int) | r_default) colon) ((block scan ^) ?(null <- r_break semicolon) | (block <- (insert(LB) *(xcep(r_break | r_case | r_default) ^) insert(RB))) ?(null <- r_break semicolon)) // enclose each case clause by block
    | r_switch (block scan ^)
"""
            simple_statement_removal_rule = "(null <- simple_statement) |"

        patternStr = """TEXT scan= 
    preq("&(a-z);") (
        (op_logical_and <- "and")
        | (op_and_assign <- "and_eq")
        | (m_abort <- "abort")
        | (r_auto <- "auto")
        | (r_amp <- "bitand")
        | (m_assert <- "assert")
        | (r_or <- "bitor")
        | (r_bool <- "bool")
        | (r_break <- "break")
        | (r_case <- "case")
        | (r_catch <- "catch")
        | (r_char <- "char")
        | (r_class <- "class")
        | (op_complement <- "compl")
        | (r_const_cast <- "const_cast") | (r_const <- "const")
        | (r_continue <- "continue")
        | (r_default <- "default")
        | (r_delete <- "delete")
        | (r_dynamic_cast <- "dynamic_cast")
        | (r_double <- "double") | (r_do <- "do")
        | (r_else <- "else")
        | (r_enum <- "enum")
        | (m_exit <- "exit")
        | (r_explicit <- "explicit")
        | (r_extern <- "extern")
        | (r_false <- "false")
        | (r_float <- "float")
        | (r_for <- "for")
        | (r_friend <- "friend")
        | (r_goto <- "goto")
        | (r_if <- "if")
        | (r_inline <- "inline")
        | (r_intmax <- "intmax_t")
        | (r_intptr <- "intptr_t")
        | (r_int64 <- ("int64_t" | "int_least64_t" | "int_fast64_t"))
        | (r_int32 <- ("int32_t" | "int_least32_t" | "int_fast32_t"))
        | (r_int16 <- ("int16_t" | "int_least16_t" | "int_fast16_t"))
        | (r_int8 <- ("int8_t" | "int_least8_t" | "int_fast8_t"))
        | (r_int <- "int")
        | (m_longjmp <- "longjmp")
        | (r_long <- "long")
        | (r_mutable <- "mutable")
        | (r_namespace <- "namespace")
        | (r_new <- "new")
        | (op_logical_neg <- "not")
        | (op_ne <- "not_eq")
        | (m_offsetof <- "offsetof")
        | (r_operator <- "operator")
        | (op_logical_or <- "or")
        | (op_or_assign <- "or_eq")
        | (r_private <- "private")
        | (r_protected <- "protected")
        | (m_ptrdiff_t <- "ptrdiff_t")
        | (r_public <- "public")
        | (r_register <- "register")
        | (r_reinterpret_cast <- "reinterpret_cast")
        | (r_restrict <- "restrict")
        | (r_return <- "return")
        | (r_short <- "short")
        | (m_setjmp <- "setjmp")
        | (r_signed <- "signed")
        | (r_sizeof <- "sizeof")
        | (m_size_t <- "size_t")
        | (r_static <- "static")
        | (r_static_cast <- "static_cast")
        | (r_struct <- "struct")
        | (r_switch <- "switch")
        | (r_template <- "template")
        // | (r_this <- "this") // keyword "this" is treated as an identifier
        | (r_throw <- "throw")
        | (r_true <- "true")
        | (r_try <- "try")
        | (r_typedef <- "typedef")
        | (r_typeid <- "typeid")
        | (r_typename <- "typename")
        | (r_union <- "union")
        | (r_unsigned <- "unsigned")
        | (r_uintmax <- "uintmax_t")
        | (r_uintptr <- "uintptr_t")
        | (r_uint64 <- ("uint64_t" | "uint_least64_t" | "uint_fast64_t"))
        | (r_uint32 <- ("uint32_t" | "uint_least32_t" | "uint_fast32_t"))
        | (r_uint16 <- ("uint16_t" | "uint_least16_t" | "uint_fast16_t"))
        | (r_uint8 <- ("uint8_t" | "uint_least8_t" | "uint_fast8_t"))
        | (r_using <- "using")
        | (r_virtual <- "virtual")
        | (r_void <- "void")
        | (r_volatile <- "volatile")
        | (m_wchar_t <- "wchar_t")
        | (r_while <- "while")
        | (op_xor <- "xor")
        | (op_xor_assign <- "xor_eq")
        | (m_assert <- "assert")
    ) xcep("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);")
    | (word <- ("&(a-z);" | "&(A-Z);" | "_") *("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);"))
    | (multiline_comment <- "/*" *(xcep("*/") any) "*/")
    | (singleline_comment <- "//" *(xcep(eol) any) preq(eol))
    | (l_string <- ?"L" "&quot;" *("&bslash;" any | xcep("&quot;" | eol) any) "&quot;")
    | (l_char <- ?"L" "&squot;" *("&bslash;" any | xcep("&squot;" | eol) any) "&squot;")
    | (l_float <- (
            +"&(0-9);" "." *"&(0-9);" ?(("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "l" | "F" | "L") 
            | +"&(0-9);" ("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "l" | "F" | "L") 
            | +"&(0-9);" ("f" | "F") ? ("l" | "L")
    )
    | (l_int <- (("0x" | "0X") +("&(0-9);" | "&(a-f);" | "&(A-F);") | +"&(0-9);") *("u" | "l" | "U" | "L"))
    | (macro_line <- "#" *("&bslash;" *(" " | "&t;") eol | xcep(eol | eof | "/*" | "//") any | (multiline_comment <- "/*" *(xcep(eof | "*/") any) "*/")) preq(eol | eof | "//"))
    | (semicolon <- ";")
    | (comma <- ",") 
    | (LB <- "{") | (RB <- "}") 
    | (LP <- "(") | (RP <- ")") 
    | (LK <- "[") | (RK <- "]") 
    // 3 char operators
    | (op_lshift_assign <- "<<=")
    | (op_rshift_assign <- ">>=")
    | (op_pointer_to_member_from_pointer <- "->*")
    // 2 char operators
    | (op_scope_resolution <- "::")
    | (op_lshift <- "<<")
    | (op_rshift <- ">>")
    | (op_increment <- "++")
    | (op_decrement <- "--")
    | (op_member_access_from_pointer <- "->")
    | (op_le <- "<=")
    | (op_ge <- ">=")
    | (op_eq <- "==")
    | (op_ne <- "!=")
    | (op_add_assign <- "+=")
    | (op_sub_assign <- "-=")
    | (op_mul_assign <- "*=")
    | (op_div_assign <- "/=")
    | (op_mod_assign <- "%%=")
    | (op_and_assign <- "&amp;" "=")
    | (op_xor_assign <- "^=")
    | (op_or_assign <- "|=")
    | (op_poiner_to_member_from_reference <- ".*")
    | (op_logical_and <- "&amp;" "&amp;")
    | (op_logical_or <- "||")
    // single char operators
    | (op_star <- "*") // may mean mul or indirection
    | (op_div <- "/")
    | (op_mod <- "%%")
    | (op_plus <- "+") // may mean add or sign plus
    | (op_minus <- "-") // may mean sub or sign minus
    | (op_amp <- "&amp;") // may mean bitwise and or indirection
    | (op_logical_neg <- "!")
    | (op_complement <- "~")
    | (op_or <- "|")
    | (op_xor <- "^")
    | (op_assign <- "=")
    | (OL <- "<") // may mean less than or template parameter
    | (OG <- ">") // may mean greater than or template parameter
    | (ques <- "?") | (colon <- ":") | (dot <- ".");

TEXT scan= (null <- macro_line | multiline_comment | singleline_comment | " " | "&t;" | "&f;" | "&bslash;" *(" " | "&t;") eol | eol)
    | (r_int <- (r_intmax | r_intptr | r_int64 | r_int32 | r_int16))
    | (r_int <- (r_uintmax | r_uintptr | r_uint64 | r_uint32 | r_uint16))
    | (r_int <- m_wchar_t)
    | (r_char <- r_int8)
    | (r_char <- r_uint8);

TEXT scan= preq(r_operator) 
    (
        (word <- r_operator comma)
        | (word <- r_operator (op_logical_neg | op_logical_and | op_logical_or))
        | (word <- r_operator (op_ne | op_eq | OG | OL | op_ge | op_le))
        | (word <- r_operator op_mod)
        | (word <- r_operator (op_mod_assign | op_and_assign | op_add_assign | op_mul_assign | op_add_assign | op_sub_assign | op_div_assign | op_lshift_assign | op_assign | op_rshift_assign | op_xor_assign))
        | (word <- r_operator (op_amp | op_star))
        | (word <- r_operator LP RP)
        | (word <- r_operator (op_plus | op_minus))
        | (word <- r_operator (op_increment | op_decrement))
        | (word <- r_operator (op_member_access_from_pointer | op_pointer_to_member_from_pointer))
        | (word <- r_operator op_div)
        | (word <- r_operator (op_lshift | op_rshift))
        | (word <- r_operator LK RK)
        | (word <- r_operator op_xor)
        | (word <- r_operator op_complement)
        | (word <- r_operator (r_delete | r_new))
        | (word <- r_operator r_bool)
    );

TEXT scan=
    (r_int <- (r_signed | r_unsigned)(r_long r_long r_int | r_long r_int | r_short r_int | r_int))
    | (r_int <- (r_signed | r_unsigned)(r_long r_long | r_long | r_short))
    | (r_char <- (r_signed | r_unsigned) r_char)
    | (r_int <- r_signed | r_unsigned)
    | (r_int <- r_long r_long | r_long | r_short)
    | (r_int <- m_size_t | m_ptrdiff_t | wchar_t)
    | (r_float <- r_long r_double | r_double)
    | (l_int <- (word match "NULL"))
    | (l_bool <- r_true | r_false)
    | (l_string <- +l_string)
    | (null <- (r_private | r_public | r_protected) colon)
    | (null <- r_virtual | r_inline | r_static)
    | (word <- op_scope_resolution word *(op_scope_resolution word) ?(op_scope_resolution op_complement word))
    | (word <- word +(op_scope_resolution word) ?(op_scope_resolution op_complement word))
    | (word <- word op_scope_resolution op_complement word);

TEXT scan= xcep(LB | RB | LP | RP | LK | RK) any
    | (block <- LB *^ RB) 
    | (null <- LP op_star) *^ (null <- RP) (op_member_access_from_pointer <- dot) 
    | (index <- LK *^ RK)
    | (param <- (LP (null <- r_void) RP | LP *^ RP));

TEXT scan= xcep(OL | OG | block | param | semicolon) any | (template_param <- OL *^ OG) 
    | (block scan ^) | (param scan ^) | (index scan ^); // recurse into block and param

TEXT scan= ?(null <- (word match "this" op_member_access_from_pointer))
        (id <- word ?(null <- template_param) *((dot | op_member_access_from_pointer) word xcep(param)) ?(null <- template_param))
    | (id <- (word match "this"))
    | (r_const_cast | r_dynamic_cast | r_reinterpret_cast | r_static_cast) (null <- template_param)
    | (block scan ^) | (param scan ^) | (index scan ^); // recurse into block and param

TEXT scan= op_assign (initialization_block <- preq(block)) (null <- block) semicolon
    | (r_class | r_struct) id (null <- colon *(r_public | r_private | r_protected | r_virtual) id *(comma *(r_public | r_private | r_protected | r_virtual) id))
    | (null <- r_enum ?id block)
    | (null <- m_assert param semicolon)
    | r_return (param match (null <- LP) *(xcep(RP) any) (null <- RP)) semicolon
    | (block scan ^); // recurse into block

TEXT scan= xcep(id | param | RK | l_float | l_int | block) any (null <- op_minus)
    | (null <- r_struct | r_union | r_enum) id xcep(block | colon)
    | ques insert(c_cond) // insert tokens for control-flow complexity counter
    | (block scan ^) // recurse into block
    | (param scan ^) | (index scan ^); // recurse into expression

TEXT scan= (value_list <- (l_bool | l_string | l_int | l_char | l_float | id) +(comma (l_string | l_int | l_char | l_float | id) ?comma))
    | (block scan ^);

TEXT scan= 
    r_if param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) *(r_else r_if param ((block scan ^) | (block <- insert(LB) ^ insert(RB)))) ?(r_else ((block scan ^) | (block <- insert(LB) ^ insert(RB)))) 
    | r_else ((block scan ^) | (block <- insert(LB) ^ insert(RB)))
    | r_while param ((block scan ^) | (block <- insert(LB) ^ insert(RB)))
    | r_for param ((block scan ^) | (block <- insert(LB) ^ insert(RB)))
    | r_do (block | (block <- insert(LB) ^ insert(RB))) r_while param semicolon
    | r_try (block scan ^) *(r_catch param (block scan ^))
    | r_catch (block scan ^)
    %(switch_statement_rule)s
    | (r_return | r_break | r_continue | op_assign) *(xcep(block | LB | semicolon) any) semicolon
    | (null <- (r_friend | r_typedef) *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_do | r_try | r_catch | r_switch) any) semicolon)
    | (null <- r_using r_namespace id semicolon)
    | (null <- r_namespace op_eq id semicolon)
    | *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_do | r_try | r_catch | r_switch) any) semicolon
    | (block scan ^); // recurse into block
    
TEXT scan= 
    r_if param block *(r_else r_if param block) ?(r_else block) 
    | r_else block
    | r_while param block
    | r_for param block
    | r_do block r_while param semicolon
    | r_try block *(r_catch param block)
    | r_catch block
    | r_switch param block
    | (simple_statement <- *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_do | r_try | r_catch | r_switch | r_case | r_default) any) semicolon)
    | (block scan ^); // recurse into block
    
TEXT scan= (simple_statement match (r_return | r_continue | r_break | r_throw) +any)
    | %(simple_statement_removal_rule)s
    // mark simple getter/setter/delegation/empty block
    (+(r_void | r_int | r_char | r_float | r_bool | r_class | r_struct | r_enum | r_union | r_const | r_volatile | op_star | op_amp | index | id) 
        param ?r_const ?(r_throw param)
        ?(colon id param *(comma id param)))
        (getter_body <- (block match LB (simple_statement match r_return ?(id op_member_access_from_pointer) id ?param semicolon) RB)
            | (block match LB (simple_statement match ?(id op_member_access_from_pointer) id param semicolon) RB)
            | ( block match LB RB)) 
    | r_namespace id (block scan ^)
    | r_extern l_string (block scan ^) // recurse into extern "C" block
    | (r_struct | r_union) ?id (block scan ^)
    | (r_class | r_struct | r_union) id (block scan ^); // recurse into top level of class definition

// enclose class/method/function definition by block
TEXT scan= (
        ?(null <- +(r_template template_param)) (
            (null <- (r_class | r_struct | r_union) ?id (block match LB RB)) // remove empty structure definition
            | (def_block <- r_class id (block scan ^))
            | (def_block <- (r_struct | r_union) ?id (block scan ^))
            | (null <- +(r_void | r_int | r_char | r_float | r_bool | r_class | r_struct | r_enum | r_union | r_const | r_volatile | op_star | op_amp | index | (id <- op_complement id) | ?r_typename id) insert(c_func) param ?r_const ?(null <- (r_throw param))
                ?(colon id param *(comma id param))
                getter_body)
            | (def_block <- +(r_int | r_char | r_float | r_bool | r_class | r_struct | r_enum | r_union | r_const | r_volatile | op_star | op_amp | index | (id <- op_complement id) | ?r_typename id ) insert(c_func) param ?r_const ?(null <- (r_throw param))
                ?(null <- colon id param *(comma id param)) // remove initialization list of constructor
                (block scan ^))
            | (def_block <- r_void id insert(c_func) param ?r_const ?(null <- (r_throw param)) (block scan ^))
        )
    )
    | (block scan ^);

// insert tokens for control-flow complexity counter
TEXT scan= (r_if | r_switch) insert(c_cond) | (r_for | r_while) insert(c_loop)
    | (def_block scan ^) | (block scan ^); // recurse into block
TEXT scan= (id | r_int | r_char | r_float | r_bool) id (param scan ^) *(comma id ?(param scan ^)) 
    | id insert(c_func) (param scan ^)
    | (def_block scan ^) | (block scan ^) | (simple_statement scan ^) | (param scan ^) | (index scan ^); // recurse into block, simple_statement, param, index
""" % (locals())
        self.pat = easytorq.Pattern(patternStr)

        fmt = easytorq.CngFormatter()

        # parameter by default
        fmt.addreplace('id', 'id|%s')

        # non parameter by default
        fmt.addreplace('l_bool', 'l_bool|%s')
        fmt.addreplace('l_char', 'l_char|%s')
        fmt.addreplace('l_int', 'l_int|%s')
        fmt.addreplace('l_float', 'l_float|%s')
        fmt.addreplace('l_string', 'l_string|%s')

        fmt.addflatten('block')
        fmt.addreplace('LB', '(brace')
        fmt.addreplace('RB', ')brace')
        fmt.addflatten('word')
        fmt.addflatten('param')
        fmt.addreplace('LP', '(paren')
        fmt.addreplace('RP', ')paren')
        fmt.addflatten('index')
        fmt.addreplace('LK', '(braket')
        fmt.addreplace('RK', ')braket')
        fmt.addterminate('macro_line')
        fmt.addflatten('simple_statement')
        fmt.addreplace('semicolon', 'suffix:semicolon')
        fmt.addreplace('colon', 'suffix:colon')
        fmt.addformat('def_block', '(def_block', ')def_block')
        fmt.addflatten('value_list')
        self.fmt = fmt
Ejemplo n.º 4
0
    def setoptions(self, optionStr):
        if optionStr not in (None, '', 'default'):
            raise pp.InvalidOptionError, "invalid option: " + optionStr

        patternStr = """TEXT scan= (
    chars <- +("&(a-z);" | "&(A-Z);" | "&(0-9);")
) | (
    space <- +(
        "&#x(0-20);" 
        | "&#x7f" 
        | "&#x(80-a0);"
        | "&#x(2000-200f);"
        | "&#x3000"
        | eol
    )
) | (
    punct <-
        "&#x(21-2f);"
        | "&#x(3a-3f);"
        | "&#x(5b-5f);"
        | "&#x(7b-7e);"
        | "&#x(a1-bf);"
        | "&#x(2010-205f);"
        | "&#x(20a0-20b5);"
        | "&#x(2190-21ff);"
        | "&#x(2200-22ff);"
        | "&#x(2300-23db);"
        | "&#x(2400-2426);"
        | "&#x(2440-244a);"
        | "&#x(2600-26b1);"
        | "&#x(2701-27be);"
        | "&#x(2a00-2aff);"
        | "&#x(27c0-27ef);"
        | "&#x(27f0-27ff);"
        | "&#x(2900-297f);"
        | "&#x(2980-29ff);"
        | "&#x(2b00-2b13);"
        | "&#x(2500-257f);"
        | "&#x(2580-259f);"
        | "&#x(25a0-25ff);"
        | "&#x(2e00-2e17);"
        | "&#x(3001-303f);"
        | "&#x(4dc0-4dff);"
        | "&#x(fe10-fe19);"
        | "&#x(ff01-ff0f);"
        | "&#x(ff01-ff0f);"
        | "&#x(ff1a-ff1f);"
        | "&#x(ff3b-ff3f);"
        | "&#x(ff5b-ff65);"
        | "&#x(ffe0-ffee);"
        | "&#x(1d300-1d356);"
) | (
        chars <- xcep(eof) any
);

TEXT scan= (null <- space) | (word <- +chars);
"""
        self.pat = easytorq.Pattern(patternStr)

        fmt = easytorq.CngFormatter()
        fmt.addreplace('punct', 't/%s')
        fmt.addreplace('word', 't/%s')
        self.fmt = fmt
Ejemplo n.º 5
0
    def setoptions(self, optionStr):
        optionStr = self.tonormalizedoptionstring(optionStr)
        if optionStr == 'default':
            optionValues = self.__defaultOptionValues
        else:
            optionValues = self.optionValues.copy()
            if optionStr != None and optionStr != '':
                for i in range(0, len(optionStr), 2):
                    name = optionStr[i]
                    value = optionStr[i + 1]
                    assert name in optionValues
                    optionValues[name] = self.__tov(value)
        self.optionValues = optionValues
        
        if optionValues['k']:
            switch_statement_rule = """
    | r_case +(xcep(colon | semicolon | eof) any) colon
    | r_default colon
    | r_switch (block scan ^)
"""
            simple_statement_removal_rule = ""
        else:
            switch_statement_rule = """
    | +((r_case (id | l_bool | l_char | l_int) | r_default) colon) ((block scan ^) ?(null <- r_break semicolon) | (block <- (insert(LB) *(xcep(r_break | r_case | r_default) ^) insert(RB))) ?(null <- r_break semicolon)) // enclose each case clause by block
    | r_switch (block scan ^)
"""
            simple_statement_removal_rule = """
TEXT scan= (null <- simple_statement)
    | r_namespace id (block scan ^)
    | (r_class | r_struct) id (null <- ?(colon *(xcep(semicolon | block) any))) (block scan ^); // recurse into top level of class definition
"""
        
        patternStr = """TEXT scan= 
    preq("&(a-z);") (
        (r_abstract <- "abstract")
        (r_alias <- "alias")
        | (r_as <- "as")
        // | (r_base <- "base") // keyword "base" is treated as an identifier
        | (r_bool <- "bool")
        | (r_break <- "break")
        | (r_byte <- "byte")
        | (r_case <- "case")
        | (r_catch <- "catch")
        | (r_char <- "char")
        | (r_checked <- "checked")
        | (r_class <- "class")
        | (r_const <- "const")
        | (r_continue <- "continue")
        | (r_decimal <- "decimal")
        | (r_default <- "default")
        | (r_delegate <- "delegate")
        | (r_double <- "double")
        | (r_do <- "do")
        | (r_else <- "else")
        | (r_enum <- "enum")
        | (r_event <- "event")
        | (r_explicit <- "explicit")
        | (r_extern <- "extern")
        | (r_false <- "false")
        | (r_finally <- "finally")
        | (r_fixed <- "fixed")
        | (r_float <- "float")
        | (r_foreach <- "foreach")
        | (r_for <- "for")
        | (r_get <- "get")
        | (r_goto <- "goto")
        | (r_if <- "if")
        | (r_implicit <- "implicit")
        | (r_interface <- "interface")
        | (r_internal <- "internal")
        | (r_int <- "int") | (r_in <- "in")
        | (r_is <- "is")
        | (r_lock <- "lock")
        | (r_long <- "long")
        | (r_namespace <- "namespace")
        | (r_new <- "new")
        | (r_null <- "null")
        //| (r_object <- "object") // keyword "object" is treated as an identifier
        | (r_operator <- "operator")
        | (r_out <- "out")
        | (r_override <- "override")
        | (r_params <- "params")
        | (r_partial <- "partial")
        | (r_private <- "private")
        | (r_protected <- "protected")
        | (r_public <- "public")
        | (r_readonly <- "readonly")
        | (r_ref <- "ref")
        | (r_return <- "return")
        | (r_sbyte <- "sbyte")
        | (r_sealed <- "sealed")
        | (r_set <- "set")
        | (r_short <- "short")
        | (r_sizeof <- "sizeof")
        | (r_stackalloc <- "stackalloc")
        | (r_static <- "static")
        | (r_string <- "string")
        | (r_struct <- "struct")
        | (r_switch <- "switch")
        // | (r_this <- "this") // keyword "this" is treated as an identifier
        | (r_throw <- "throw")
        | (r_true <- "true")
        | (r_try <- "try")
        | (r_typeof <- "typeof")
        | (r_uint <- "uint")
        | (r_ulong <- "ulong")
        | (r_unchecked <- "unchecked")
        | (r_unsafe <- "unsafe")
        | (r_ushort <- "ushort")
        | (r_using <- "using")
        // | (r_value <- "value") // keyword "value" is treated as an identifier
        | (r_virtual <- "virtual")
        | (r_void <- "void")
        | (r_volatile <- "volatile")
        | (r_while <- "while")
        | (r_yield <- "yield")
    ) xcep("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);")
    | preq("&(A-Z);") (
        (m_Clone <- "Clone")
        | (m_CompareTo <- "CompareTo")
        | (m_Dispose <- "Dispose")
        | (m_Equals <- "Equals")
        | (m_GetHashCode <- "GetHashCode")
        | (m_GetType <- "GetType")
        | (m_InitializeComponent <- "InitializeComponent")
        | (m_Nullable <- ?"System." "Nullable")
        | (m_ReferenceEquals <- "ReferenceEquals")
        | (m_ToString <- "ToString")
        | (r_object <- "System.Object" | "Object")
        | (r_string <- "System.String" | "String")
        | (r_char <- "System.Char" | "Char")
        | (r_sbyte <- "System.SByte" | "SByte")
        | (r_short <- "System.Int16" | "Int16")
        | (r_ushort <- "System.UInt16" | "UInt16")
        | (r_int <- "System.Int32" | "Int32")
        | (r_uint <- "System.UInt32" | "UInt32")
        | (r_long <- "System.Int64" | "Int64")
        | (r_ulong <- "System.UInt64" | "UInt64")
        | (r_float <- "System.Single" | "Single")
        | (r_double <- "System.Double" | "Double")
        | (r_bool <- "System.Boolean" | "Boolean")
        | (r_decimal <- "System.Decimal" | "Decimal")
    ) xcep("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);")
    | (word <- ?"@" ("&(a-z);" | "&(A-Z);" | "_") *("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);"))
    | (multiline_comment <- "/*" *(+"*" (xcep("/") any) | xcep("*") any) +"*" "/")
    | (singleline_comment <- "//" *(xcep(eol) any))
    | (l_string <- "@" "&quot;" *("&quot;" "&quot;" | xcep("&quot;" | eof) any) "&quot;") // berbatim string
    | (l_string <- "&quot;" *("&bslash;" any | xcep("&quot;" | eol) any) "&quot;")
    | (l_char <- "&squot;" *("&bslash;" any | xcep("&squot;" | "&quot;" | eol) any) "&squot;")
    | (l_float <- (
            +"&(0-9);" "." *"&(0-9);" ?(("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "F" | "d" | "D" | "m" | "M") 
            | +"&(0-9);" ("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "F" | "d" | "D" | "m" | "M")
            | +"&(0-9);" ("f" | "F" | "d" | "D" | "m" | "M")
    )
    | (l_int <- (("0x" | "0X") +("&(0-9);" | "&(a-f);" | "&(A-F);") | +"&(0-9);") *("l" | "L" | "u" | "U"))
    | (macro_line <- "#" *(xcep(eof | eol) any))
    | (semicolon <- ";")
    | (comma <- ",") 
    | (LB <- "{") | (RB <- "}") 
    | (LP <- "(") | (RP <- ")") 
    | (LK <- "[") | (RK <- "]") 
    // 3 char operators
    | (op_lshift_assign <- "<<=")
    | (op_rshift_assign <- ">>=")
    // 2 char operators
    | (op_lshift <- "<<")
    | (op_rshift <- ">>")
    | (op_increment <- "++")
    | (op_decrement <- "--")
    | (op_le <- "<=")
    | (op_ge <- ">=")
    | (op_eq <- "==")
    | (op_ne <- "!=")
    | (op_add_assign <- "+=")
    | (op_sub_assign <- "-=")
    | (op_mul_assign <- "*=")
    | (op_div_assign <- "/=")
    | (op_mod_assign <- "%%=")
    | (op_and_assign <- "&amp;" "=")
    | (op_xor_assign <- "^=")
    | (op_or_assign <- "|=")
    | (op_logical_and <- "&amp;" "&amp;")
    | (op_logical_or <- "||")
    | (op_lambda <- "=>")
    | (op_namespace_alias_resolution <- "::")
    // single char operators
    | (op_star <- "*") // may mean mul or wildcard
    | (op_div <- "/")
    | (op_mod <- "%%")
    | (op_plus <- "+") // may mean add or sign plus
    | (op_minus <- "-") // may mean sub or sign minus
    | (op_amp <- "&amp;")
    | (op_logical_neg <- "!")
    | (op_complement <- "~")
    | (op_or <- "|")
    | (op_xor <- "^")
    | (op_assign <- "=")
    | (OL <- "<") // may mean less than or template parameter
    | (OG <- ">") // may mean greater than or template parameter
    | (ques <- "?") | (colon <- ":") | (dot <- ".");

TEXT scan= null <- macro_line | multiline_comment | singleline_comment | " " | "&t;" | eol;

TEXT scan= null <- (?r_extern r_alias | r_event | r_delegate xcep(LP | LB | semicolon) any | r_using xcep(LP | semicolon) any) *(xcep(semicolon) any) semicolon;

TEXT match= (null <- +(attribute <- (LK *(xcep(eof | RK) any) RK))) *any | *any;
TEXT scan= (semicolon | RB) (null <- +(attribute <- (LK *(xcep(eof | RK) any) RK))); // remove attribute

TEXT scan= 
    (r_byte <- r_sbyte)
    | (r_int <- r_uint | r_short | r_ushort | r_long | r_ulong) 
    | (r_double <- r_float) 
    | (l_bool <- r_true | r_false);

TEXT scan= xcep(LB | RB | LP | RP) any 
    | (get_set_decl <- LB r_get semicolon ?(r_set semicolon) RB | LB r_set semicolon RB)
    | (block <- LB *^ RB) 
    | (param <- LP *^ RP)
    | (index <- LK *^ RK);

// remove generated code
TEXT scan= (null <- r_void m_InitializeComponent (param match LP RP) block)
    | (null <- r_void m_Dispose (param match LP r_bool (word match "disposing") RP)
        (block match LB r_if (param match LP (word match "disposing") RP) 
            (block match LB r_if (param match LP (word match "components") op_ne r_null RP) 
                (block match LB (word match "components") dot m_Dispose (param match LP RP) semicolon RB)
            RB)
            (word match "base") dot m_Dispose (param match LP (word match "disposing") RP) semicolon
        RB)
    )
    | (block scan ^); // recurse into block

TEXT scan= xcep(OL | OG | block | param) any | (template_param <- OL *^ OG) 
    | (block scan ^) | (param scan ^); // recurse into block and param

TEXT scan= ?(null <- (word match "this") dot) (id <- word *((dot | op_namespace_alias_resolution) word) ?template_param ?ques)
    | (id <- (word match "this"))
    | (string_litral <- l_string +(op_plus l_string))
    | (block scan ^) | (param scan ^); // recurse into block and param

TEXT scan= (null <- m_Nullable OL) id (null <- OG)
    | ques insert(c_cond) // insert tokens for control-flow complexity counter
    | (block scan ^) | (param scan ^); // recurse into block and param

// remove simple delegations; remove empty method definition; remove redundant paren of return statement
TEXT scan=
    (null <- (r_void | r_int | r_long | r_short | r_double | r_float | r_bool | r_char | r_byte | r_decimal | r_string | r_object | id) *(index match LK RK)
         id param ((block match LB ?r_return id dot id param semicolon RB) | (block match LB RB)))
    | r_return (param match (null <- LP) *(xcep(RP) any) (null <- RP)) semicolon
    | op_assign (initialization_block <- preq(block)) (null <- block) semicolon
    | index (initialization_block <- preq(block)) (null <- block)
    | (null <- r_enum id ?(colon any) (initialization_block <- preq(block)) (null <- block))
    | (null <- r_private | r_public | r_protected | r_internal | r_override | r_virtual | r_sealed | r_unsafe | r_static | r_partial)
    | (null <- r_get) (null <- (block match LB r_return (id | l_string | l_char | l_int | l_float | l_bool) semicolon RB)) // simple getter
    | (null <- r_set) (null <- (block match LB id op_assign id semicolon RB)) // simple setter
    | (null <- r_interface id ?(colon id *(comma id)) block)
    | (block scan ^) | (param scan ^); // recurse into block and param

TEXT scan= 
    r_if param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) *(r_else r_if param ((block scan ^) | (block <- insert(LB) ^ insert(RB)))) ?(r_else ((block scan ^) | (block <- insert(LB) ^ insert(RB)))) 
    | r_else ((block scan ^) | (block <- insert(LB) ^ insert(RB)))
    | r_while param ((block scan ^) | (block <- insert(LB) ^ insert(RB)))
    | (r_for | r_foreach) param ((block scan ^) | (block <- insert(LB) ^ insert(RB)))
    | r_do ((block scan ^)| (block <- insert(LB) ^ insert(RB))) r_while param semicolon
    | r_try (block scan ^) *((r_catch ?param | r_finally) (block scan ^))
    | (r_catch ?param | r_finally) (block scan ^)
    %(switch_statement_rule)s
    | *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_foreach | r_do | r_try | r_catch | r_finally | r_switch) any) semicolon
    | (block scan ^); // recurse into block

TEXT scan= 
    r_if param block *(r_else r_if param block) ?(r_else block) 
    | r_else block
    | r_while param block
    | (r_for | r_foreach) param block
    | r_do block r_while param semicolon
    | r_try block *((r_catch ?param | r_finally) block)
    | (r_catch ?param | r_finally) block
    | r_switch param block
    | +(r_using param) block
    | r_delegate ?param block
    | (simple_statement <- *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_foreach | r_do | r_try | r_catch | r_finally | r_switch | r_case | r_default | r_using | r_delegate) any) semicolon)
    | (null <- (r_int | r_long | r_short | r_double | r_float | r_bool | r_char | r_byte | r_decimal | r_string | r_object | id) *(index match LK RK) id (block match LB RB)) // property without any getter/setter
    | (block scan ^); // recurse into block

%(simple_statement_removal_rule)s

// enclose class/method definition by block
TEXT scan= (def_block <- (r_class | r_struct) id (block scan ^))
    | (def_block <- (r_void | r_int | r_long | r_short | r_double | r_float | r_bool | r_char | r_byte | r_decimal | r_string | r_object | id) 
        (
            (id param (block scan ^))
            | (id (block scan ^))
        )
    )
    | (def_block <- (r_get | r_set) (block scan ^))
    | (block scan ^);

// insert tokens for control-flow complexity counter
TEXT scan= (r_if | r_switch) insert(c_cond) | (r_for | r_while | r_foreach) insert(c_loop)
    | id insert(c_func) (param scan ^)
    | (r_get | r_set) insert(c_func) (block scan ^)
    | (def_block scan ^) | (block scan ^) | (param scan ^) | (index scan ^) | (simple_statement scan ^);
""" % (locals())

        self.pat = easytorq.Pattern(patternStr)
        
        fmt = easytorq.CngFormatter()
        
        # parameter by default
        fmt.addreplace('id', 'id|%s')
        
        # non parameter by default
        fmt.addreplace('l_bool', 'l_bool|%s')
        fmt.addreplace('l_char', 'l_char|%s')
        fmt.addreplace('l_int', 'l_int|%s')
        fmt.addreplace('l_float', 'l_float|%s')
        fmt.addreplace('l_string', 'l_string|%s')
        
        fmt.addflatten('block')
        fmt.addreplace('LB', '(brace')
        fmt.addreplace('RB', ')brace')
        fmt.addflatten('word')
        fmt.addflatten('param')
        fmt.addreplace('LP', '(paren')
        fmt.addreplace('RP', ')paren')
        fmt.addflatten('index')
        fmt.addreplace('LK', '(braket')
        fmt.addreplace('RK', ')braket')
        fmt.addflatten('simple_statement')
        fmt.addreplace('semicolon', 'suffix:semicolon')
        fmt.addreplace('colon', 'suffix:colon')
        fmt.addformat('def_block', '(def_block', ')def_block')
        self.fmt = fmt