def extract_comments(filename): """Extracts a list of comments from the given Go source file. Comments are represented with the Comment class found in the common module. Go comments come in two forms, single and multi-line comments. - Single-line comments begin with '//' and continue to the end of line. - Multi-line comments begin with '/*' and end with '*/' and can span multiple lines of code. If a multi-line comment does not terminate before EOF is reached, then an exception is raised. Go comments are not allowed to start in a string or rune literal. This module makes sure to watch out for those. https://golang.org/ref/spec#Comments Args: filename: String name of the file to extract comments from. Returns: Python list of common.Comment in the order that they appear in the file. Raises: common.FileError: File was unable to be open or read. common.UnterminatedCommentError: Encountered an unterminated multi-line comment. """ try: with open(filename, 'r') as source_file: state = 0 current_comment = '' comments = [] line_counter = 1 comment_start = 1 string_char = '' while True: char = source_file.read(1) if not char: if state is 3 or state is 4: raise common.UnterminatedCommentError() if state is 2: # Was in single-line comment. Create comment. comment = common.Comment(current_comment, line_counter) comments.append(comment) return comments if state is 0: # Waiting for comment start character or beginning of # string or rune literal. if char == '/': state = 1 elif char == '"' or char == "'" or char == '`': string_char = char state = 5 elif state is 1: # Found comment start character, classify next character and # determine if single or multi-line comment. if char == '/': state = 2 elif char == '*': comment_start = line_counter state = 3 else: state = 0 elif state is 2: # In single-line comment, read characters util EOL. if char == '\n': comment = common.Comment(current_comment, line_counter) comments.append(comment) current_comment = '' state = 0 else: current_comment += char elif state is 3: # In multi-line comment, add characters until '*' is # encountered. if char == '*': state = 4 else: current_comment += char elif state is 4: # In multi-line comment with asterisk found. Determine if # comment is ending. if char == '/': comment = common.Comment(current_comment, comment_start, multiline=True) comments.append(comment) current_comment = '' state = 0 else: current_comment += '*' # Care for multiple '*' in a row if char != '*': current_comment += char state = 3 elif state is 5: # In string literal, expect literal end or escape character. if char == string_char: state = 0 elif char == '\\': state = 6 elif state is 6: # In string literal, escaping current char. state = 5 if char == '\n': line_counter += 1 except OSError as exception: raise common.FileError(str(exception))
def testTwoStringsFollowedByComment(self): code = r'"""" // foo' comments = c_parser.extract_comments(code) self.assertEqual(comments, [common.Comment(' foo', 1)])
def testSimpleMain(self): text = "// this is a comment\nint main() {\nreturn 0;\n}\n" comments = self.ExtractComments(text) expected = [common.Comment(text[2:20], 1, multiline=False)] self.assertEqual(comments, expected)
def extract_comments(code): """Extracts a list of comments from the given Javascript source code. Comments are represented with the Comment class found in the common module. Javascript comments come in two forms, single and multi-line comments. - Single-line comments begin with '//' and continue to the end of line. - Multi-line comments begin with '/*' and end with '*/' and can span multiple lines of code. If a multi-line comment does not terminate before EOF is reached, then an exception is raised. This module takes quoted strings into account when extracting comments from source code. Args: code: String containing code to extract comments from. Returns: Python list of common.Comment in the order that they appear in the code. Raises: common.UnterminatedCommentError: Encountered an unterminated multi-line comment. """ state = 0 current_comment = '' comments = [] line_counter = 1 comment_start = 1 string_char = '' for char in code: if state == 0: # Waiting for comment start character or beginning of # string. if char == '/': state = 1 elif char in ('"', "'"): string_char = char state = 5 elif state == 1: # Found comment start character, classify next character and # determine if single or multi-line comment. if char == '/': state = 2 elif char == '*': comment_start = line_counter state = 3 else: state = 0 elif state == 2: # In single-line comment, read characters util EOL. if char == '\n': comment = common.Comment(current_comment, line_counter) comments.append(comment) current_comment = '' state = 0 else: current_comment += char elif state == 3: # In multi-line comment, add characters until '*' is # encountered. if char == '*': state = 4 else: current_comment += char elif state == 4: # In multi-line comment with asterisk found. Determine if # comment is ending. if char == '/': comment = common.Comment(current_comment, comment_start, multiline=True) comments.append(comment) current_comment = '' state = 0 else: current_comment += '*' # Care for multiple '*' in a row if char != '*': current_comment += char state = 3 elif state == 5: # In string literal, expect literal end or escape character. if char == string_char: state = 0 elif char == '\\': state = 6 elif state == 6: # In string literal, escaping current char. state = 5 if char == '\n': line_counter += 1 # EOF. if state in (3, 4): raise common.UnterminatedCommentError() if state == 2: # Was in single-line comment. Create comment. comment = common.Comment(current_comment, line_counter) comments.append(comment) return comments
def testSingleLineComment(self): text = '// single line comment' comments = self.ExtractComments(text) expected = [common.Comment(text[2:], 1, multiline=False)] self.assertEqual(comments, expected)
def testMultiLineCommentWithStars(self): text = "/***************/" comments = self.ExtractComments(text) expected = [common.Comment(text[2:-2], 1, multiline=True)] self.assertEqual(comments, expected)
def testDoubleComment(self): code = '# this is not # another comment' comments = ruby_parser.extract_comments(code) expected = [common.Comment(code[1:], 1, multiline=False)] self.assertEqual(comments, expected)
def testMultiLineCommentWithStars(self): code = "/***************/" comments = js_parser.extract_comments(code) expected = [common.Comment(code[2:-2], 1, multiline=True)] self.assertEqual(comments, expected)
def extract_comments(code): """Extracts a list of comments from the given shell script. Comments are represented with the Comment class found in the common module. Shell script comments only come in one form, single-line. Single line comments start with an unquoted or unescaped '#' and continue on until the end of the line. A quoted '#' is one that is located within a pair of matching single or double quote marks. An escaped '#' is one that is immediately preceeded by a backslash '\' Args: code: String containing code to extract comments from. Returns: Python list of common.Comment in the order that they appear in the code. """ state = 0 string_char = '' current_comment = '' comments = [] line_counter = 1 for char in code: if state == 0: # Waiting for comment start character, beginning of string, # or escape character. if char == '#': state = 1 elif char in ('"', "'"): string_char = char state = 2 elif char == '\\': state = 4 elif state == 1: # Found comment start character. Read comment until EOL. if char == '\n': comment = common.Comment(current_comment, line_counter) comments.append(comment) current_comment = '' state = 0 else: current_comment += char elif state == 2: # In string literal, wait for string end or escape char. if char == string_char: state = 0 elif char == '\\': state = 3 elif state == 3: # Escaping current char, inside of string. state = 2 elif state == 4: # Escaping current char, outside of string. state = 0 if char == '\n': line_counter += 1 # EOF. if state == 1: # Was in single line comment. Create comment. comment = common.Comment(current_comment, line_counter) comments.append(comment) return comments
def extract_comments(filename): """Extracts a list of comments from the given shell script. Comments are represented with the Comment class found in the common module. Shell script comments only come in one form, single-line. Single line comments start with an unquoted or unescaped '#' and continue on until the end of the line. A quoted '#' is one that is located within a pair of matching single or double quote marks. An escaped '#' is one that is immediately preceeded by a backslash '\' Args: filename: String name of the file to extract comments from. Returns: Python list of common.Comment in the order that they appear in the file. Raises: common.FileError: File was unable to be open or read. """ try: with open(filename, 'r') as source_file: state = 0 string_char = '' current_comment = '' comments = [] line_counter = 1 while True: char = source_file.read(1) if not char: # EOF if state is 1: # Was in single line comment. Create comment. comment = common.Comment(current_comment, line_counter) comments.append(comment) return comments if state is 0: # Waiting for comment start character, beginning of string, # or escape character. if char == '#': state = 1 elif char == '"' or char == "'": string_char = char state = 2 elif char == '\\': state = 4 elif state is 1: # Found comment start character. Read comment until EOL. if char == '\n': comment = common.Comment(current_comment, line_counter) comments.append(comment) current_comment = '' state = 0 else: current_comment += char elif state is 2: # In string literal, wait for string end or escape char. if char == string_char: state = 0 elif char == '\\': state = 3 elif state is 3: # Escaping current char, inside of string. state = 2 elif state is 4: # Escaping current char, outside of string. state = 0 if char == '\n': line_counter += 1 except OSError as exception: raise common.FileError(str(exception))
def testOtherCommentedComment(self): code = '''<?php #// double comment''' comments = php_parser.extract_comments(code) self.assertEqual(comments, [common.Comment('// double comment', 2)])
def testMultiLineCommentWithStars(self): code = """<?php /***************/""" comments = php_parser.extract_comments(code) expected = [common.Comment("*************", 2, multiline=True)] self.assertEqual(comments, expected)
def testMultiLineComment(self): code = '''<?php /* multiline\ncomment */''' comments = php_parser.extract_comments(code) expected = [common.Comment(' multiline\ncomment ', 2, multiline=True)] self.assertEqual(comments, expected)
def testSimpleMain(self): code = "// this is a comment\nint main() {\nreturn 0;\n}\n" comments = c_parser.extract_comments(code) expected = [common.Comment(code[2:20], 1, multiline=False)] self.assertEqual(comments, expected)
def testDifferentLiteralsSeparatedByComment(self): code = r''''This is' # "a comment"''' comments = ruby_parser.extract_comments(code) expected = [common.Comment(code[11:], 1, multiline=False)] self.assertEqual(comments, expected)
def testMultiLineComment(self): code = '/* multiline\ncomment */' comments = js_parser.extract_comments(code) expected = [common.Comment(code[2:-2], 1, multiline=True)] self.assertEqual(comments, expected)
def testComment(self): code = '# comment' comments = ruby_parser.extract_comments(code) expected = [common.Comment(code[1:], 1, multiline=False)] self.assertEqual(comments, expected)
def testSingleLineComment(self): code = '// single line comment' comments = js_parser.extract_comments(code) expected = [common.Comment(code[2:], 1, multiline=False)] self.assertEqual(comments, expected)
def testMultilineComment(self): code = '<!--multi-line\ncomment-->' comments = html_parser.extract_comments(code) expected = [common.Comment('multi-line\ncomment', 1, multiline=True)] self.assertEqual(comments, expected)
def testMultiLineComment(self): text = '/* multiline\ncomment */' comments = self.ExtractComments(text) expected = [common.Comment(text[2:-2], 1, multiline=True)] self.assertEqual(comments, expected)
def testComment(self): code = '<!--comment-->' comments = html_parser.extract_comments(code) expected = [common.Comment('comment', 1, multiline=False)] self.assertEqual(comments, expected)
def testEscapedDoubleQuote(self): code = '\\"# this is a comment' comments = ruby_parser.extract_comments(code) expected = [common.Comment(code[3:], 1, multiline=False)] self.assertEqual(comments, expected)
def testEscapedSingleQuote(self): code = "\\'# this is a comment" comments = python_parser.extract_comments(code) expected = [common.Comment(code[3:], 1, multiline=False)] self.assertEqual(comments, expected)