Esempio n. 1
0
def extract_comments(filename):
    """Extracts a list of comments from the given Go source file.

    Comments are represented with the Comment class found in the common module.
    Go comments come in two forms, single and multi-line comments.
        - Single-line comments begin with '//' and continue to the end of line.
        - Multi-line comments begin with '/*' and end with '*/' and can span
            multiple lines of code. If a multi-line comment does not terminate
            before EOF is reached, then an exception is raised.
    Go comments are not allowed to start in a string or rune literal. This
    module makes sure to watch out for those.

    https://golang.org/ref/spec#Comments

    Args:
        filename: String name of the file to extract comments from.
    Returns:
        Python list of common.Comment in the order that they appear in the file.
    Raises:
        common.FileError: File was unable to be open or read.
        common.UnterminatedCommentError: Encountered an unterminated multi-line
            comment.
    """
    try:
        with open(filename, 'r') as source_file:
            state = 0
            current_comment = ''
            comments = []
            line_counter = 1
            comment_start = 1
            string_char = ''
            while True:
                char = source_file.read(1)
                if not char:
                    if state is 3 or state is 4:
                        raise common.UnterminatedCommentError()
                    if state is 2:
                        # Was in single-line comment. Create comment.
                        comment = common.Comment(current_comment, line_counter)
                        comments.append(comment)
                    return comments
                if state is 0:
                    # Waiting for comment start character or beginning of
                    # string or rune literal.
                    if char == '/':
                        state = 1
                    elif char == '"' or char == "'" or char == '`':
                        string_char = char
                        state = 5
                elif state is 1:
                    # Found comment start character, classify next character and
                    # determine if single or multi-line comment.
                    if char == '/':
                        state = 2
                    elif char == '*':
                        comment_start = line_counter
                        state = 3
                    else:
                        state = 0
                elif state is 2:
                    # In single-line comment, read characters util EOL.
                    if char == '\n':
                        comment = common.Comment(current_comment, line_counter)
                        comments.append(comment)
                        current_comment = ''
                        state = 0
                    else:
                        current_comment += char
                elif state is 3:
                    # In multi-line comment, add characters until '*' is
                    # encountered.
                    if char == '*':
                        state = 4
                    else:
                        current_comment += char
                elif state is 4:
                    # In multi-line comment with asterisk found. Determine if
                    # comment is ending.
                    if char == '/':
                        comment = common.Comment(current_comment,
                                                 comment_start,
                                                 multiline=True)
                        comments.append(comment)
                        current_comment = ''
                        state = 0
                    else:
                        current_comment += '*'
                        # Care for multiple '*' in a row
                        if char != '*':
                            current_comment += char
                            state = 3
                elif state is 5:
                    # In string literal, expect literal end or escape character.
                    if char == string_char:
                        state = 0
                    elif char == '\\':
                        state = 6
                elif state is 6:
                    # In string literal, escaping current char.
                    state = 5
                if char == '\n':
                    line_counter += 1
    except OSError as exception:
        raise common.FileError(str(exception))
Esempio n. 2
0
 def testTwoStringsFollowedByComment(self):
     code = r'"""" // foo'
     comments = c_parser.extract_comments(code)
     self.assertEqual(comments, [common.Comment(' foo', 1)])
 def testSimpleMain(self):
     text = "// this is a comment\nint main() {\nreturn 0;\n}\n"
     comments = self.ExtractComments(text)
     expected = [common.Comment(text[2:20], 1, multiline=False)]
     self.assertEqual(comments, expected)
Esempio n. 4
0
def extract_comments(code):
    """Extracts a list of comments from the given Javascript source code.

  Comments are represented with the Comment class found in the common module.
  Javascript comments come in two forms, single and multi-line comments.
    - Single-line comments begin with '//' and continue to the end of line.
    - Multi-line comments begin with '/*' and end with '*/' and can span
      multiple lines of code. If a multi-line comment does not terminate
      before EOF is reached, then an exception is raised.
  This module takes quoted strings into account when extracting comments from
  source code.

  Args:
    code: String containing code to extract comments from.
  Returns:
    Python list of common.Comment in the order that they appear in the code.
  Raises:
    common.UnterminatedCommentError: Encountered an unterminated multi-line
      comment.
  """
    state = 0
    current_comment = ''
    comments = []
    line_counter = 1
    comment_start = 1
    string_char = ''
    for char in code:
        if state == 0:
            # Waiting for comment start character or beginning of
            # string.
            if char == '/':
                state = 1
            elif char in ('"', "'"):
                string_char = char
                state = 5
        elif state == 1:
            # Found comment start character, classify next character and
            # determine if single or multi-line comment.
            if char == '/':
                state = 2
            elif char == '*':
                comment_start = line_counter
                state = 3
            else:
                state = 0
        elif state == 2:
            # In single-line comment, read characters util EOL.
            if char == '\n':
                comment = common.Comment(current_comment, line_counter)
                comments.append(comment)
                current_comment = ''
                state = 0
            else:
                current_comment += char
        elif state == 3:
            # In multi-line comment, add characters until '*' is
            # encountered.
            if char == '*':
                state = 4
            else:
                current_comment += char
        elif state == 4:
            # In multi-line comment with asterisk found. Determine if
            # comment is ending.
            if char == '/':
                comment = common.Comment(current_comment,
                                         comment_start,
                                         multiline=True)
                comments.append(comment)
                current_comment = ''
                state = 0
            else:
                current_comment += '*'
                # Care for multiple '*' in a row
                if char != '*':
                    current_comment += char
                    state = 3
        elif state == 5:
            # In string literal, expect literal end or escape character.
            if char == string_char:
                state = 0
            elif char == '\\':
                state = 6
        elif state == 6:
            # In string literal, escaping current char.
            state = 5
        if char == '\n':
            line_counter += 1

    # EOF.
    if state in (3, 4):
        raise common.UnterminatedCommentError()
    if state == 2:
        # Was in single-line comment. Create comment.
        comment = common.Comment(current_comment, line_counter)
        comments.append(comment)
    return comments
Esempio n. 5
0
 def testSingleLineComment(self):
     text = '// single line comment'
     comments = self.ExtractComments(text)
     expected = [common.Comment(text[2:], 1, multiline=False)]
     self.assertEqual(comments, expected)
Esempio n. 6
0
 def testMultiLineCommentWithStars(self):
     text = "/***************/"
     comments = self.ExtractComments(text)
     expected = [common.Comment(text[2:-2], 1, multiline=True)]
     self.assertEqual(comments, expected)
Esempio n. 7
0
 def testDoubleComment(self):
     code = '# this is not # another comment'
     comments = ruby_parser.extract_comments(code)
     expected = [common.Comment(code[1:], 1, multiline=False)]
     self.assertEqual(comments, expected)
Esempio n. 8
0
 def testMultiLineCommentWithStars(self):
     code = "/***************/"
     comments = js_parser.extract_comments(code)
     expected = [common.Comment(code[2:-2], 1, multiline=True)]
     self.assertEqual(comments, expected)
def extract_comments(code):
    """Extracts a list of comments from the given shell script.

  Comments are represented with the Comment class found in the common module.
  Shell script comments only come in one form, single-line. Single line
  comments start with an unquoted or unescaped '#' and continue on until the
  end of the line. A quoted '#' is one that is located within a pair of
  matching single or double quote marks. An escaped '#' is one that is
  immediately preceeded by a backslash '\'

  Args:
    code: String containing code to extract comments from.
  Returns:
    Python list of common.Comment in the order that they appear in the code.
  """
    state = 0
    string_char = ''
    current_comment = ''
    comments = []
    line_counter = 1
    for char in code:
        if state == 0:
            # Waiting for comment start character, beginning of string,
            # or escape character.
            if char == '#':
                state = 1
            elif char in ('"', "'"):
                string_char = char
                state = 2
            elif char == '\\':
                state = 4
        elif state == 1:
            # Found comment start character. Read comment until EOL.
            if char == '\n':
                comment = common.Comment(current_comment, line_counter)
                comments.append(comment)
                current_comment = ''
                state = 0
            else:
                current_comment += char
        elif state == 2:
            # In string literal, wait for string end or escape char.
            if char == string_char:
                state = 0
            elif char == '\\':
                state = 3
        elif state == 3:
            # Escaping current char, inside of string.
            state = 2
        elif state == 4:
            # Escaping current char, outside of string.
            state = 0
        if char == '\n':
            line_counter += 1

    # EOF.
    if state == 1:
        # Was in single line comment. Create comment.
        comment = common.Comment(current_comment, line_counter)
        comments.append(comment)
    return comments
Esempio n. 10
0
def extract_comments(filename):
    """Extracts a list of comments from the given shell script.

    Comments are represented with the Comment class found in the common module.
    Shell script comments only come in one form, single-line. Single line
    comments start with an unquoted or unescaped '#' and continue on until the
    end of the line. A quoted '#' is one that is located within a pair of
    matching single or double quote marks. An escaped '#' is one that is
    immediately preceeded by a backslash '\'

    Args:
        filename: String name of the file to extract comments from.
    Returns:
        Python list of common.Comment in the order that they appear in the file.
    Raises:
        common.FileError: File was unable to be open or read.
    """
    try:
        with open(filename, 'r') as source_file:
            state = 0
            string_char = ''
            current_comment = ''
            comments = []
            line_counter = 1
            while True:
                char = source_file.read(1)
                if not char:
                    # EOF
                    if state is 1:
                        # Was in single line comment. Create comment.
                        comment = common.Comment(current_comment, line_counter)
                        comments.append(comment)
                    return comments
                if state is 0:
                    # Waiting for comment start character, beginning of string,
                    # or escape character.
                    if char == '#':
                        state = 1
                    elif char == '"' or char == "'":
                        string_char = char
                        state = 2
                    elif char == '\\':
                        state = 4
                elif state is 1:
                    # Found comment start character. Read comment until EOL.
                    if char == '\n':
                        comment = common.Comment(current_comment, line_counter)
                        comments.append(comment)
                        current_comment = ''
                        state = 0
                    else:
                        current_comment += char
                elif state is 2:
                    # In string literal, wait for string end or escape char.
                    if char == string_char:
                        state = 0
                    elif char == '\\':
                        state = 3
                elif state is 3:
                    # Escaping current char, inside of string.
                    state = 2
                elif state is 4:
                    # Escaping current char, outside of string.
                    state = 0
                if char == '\n':
                    line_counter += 1
    except OSError as exception:
        raise common.FileError(str(exception))
Esempio n. 11
0
 def testOtherCommentedComment(self):
     code = '''<?php
 #// double comment'''
     comments = php_parser.extract_comments(code)
     self.assertEqual(comments, [common.Comment('// double comment', 2)])
Esempio n. 12
0
 def testMultiLineCommentWithStars(self):
     code = """<?php
 /***************/"""
     comments = php_parser.extract_comments(code)
     expected = [common.Comment("*************", 2, multiline=True)]
     self.assertEqual(comments, expected)
Esempio n. 13
0
 def testMultiLineComment(self):
     code = '''<?php
 /* multiline\ncomment */'''
     comments = php_parser.extract_comments(code)
     expected = [common.Comment(' multiline\ncomment ', 2, multiline=True)]
     self.assertEqual(comments, expected)
Esempio n. 14
0
 def testSimpleMain(self):
     code = "// this is a comment\nint main() {\nreturn 0;\n}\n"
     comments = c_parser.extract_comments(code)
     expected = [common.Comment(code[2:20], 1, multiline=False)]
     self.assertEqual(comments, expected)
Esempio n. 15
0
 def testDifferentLiteralsSeparatedByComment(self):
     code = r''''This is' # "a comment"'''
     comments = ruby_parser.extract_comments(code)
     expected = [common.Comment(code[11:], 1, multiline=False)]
     self.assertEqual(comments, expected)
Esempio n. 16
0
 def testMultiLineComment(self):
     code = '/* multiline\ncomment */'
     comments = js_parser.extract_comments(code)
     expected = [common.Comment(code[2:-2], 1, multiline=True)]
     self.assertEqual(comments, expected)
Esempio n. 17
0
 def testComment(self):
     code = '# comment'
     comments = ruby_parser.extract_comments(code)
     expected = [common.Comment(code[1:], 1, multiline=False)]
     self.assertEqual(comments, expected)
Esempio n. 18
0
 def testSingleLineComment(self):
     code = '// single line comment'
     comments = js_parser.extract_comments(code)
     expected = [common.Comment(code[2:], 1, multiline=False)]
     self.assertEqual(comments, expected)
Esempio n. 19
0
 def testMultilineComment(self):
     code = '<!--multi-line\ncomment-->'
     comments = html_parser.extract_comments(code)
     expected = [common.Comment('multi-line\ncomment', 1, multiline=True)]
     self.assertEqual(comments, expected)
Esempio n. 20
0
 def testMultiLineComment(self):
     text = '/* multiline\ncomment */'
     comments = self.ExtractComments(text)
     expected = [common.Comment(text[2:-2], 1, multiline=True)]
     self.assertEqual(comments, expected)
Esempio n. 21
0
 def testComment(self):
     code = '<!--comment-->'
     comments = html_parser.extract_comments(code)
     expected = [common.Comment('comment', 1, multiline=False)]
     self.assertEqual(comments, expected)
 def testEscapedDoubleQuote(self):
     code = '\\"# this is a comment'
     comments = ruby_parser.extract_comments(code)
     expected = [common.Comment(code[3:], 1, multiline=False)]
     self.assertEqual(comments, expected)
Esempio n. 23
0
 def testEscapedSingleQuote(self):
     code = "\\'# this is a comment"
     comments = python_parser.extract_comments(code)
     expected = [common.Comment(code[3:], 1, multiline=False)]
     self.assertEqual(comments, expected)