def test_extract_comments_empty_multiline(self):
        '''Find empty comments'''
        for empty_lines in range(4):
            extractor = tested.CommentExtractor([])
            line = '"""'
            tail, comments, comment, matcher, is_comment = extractor.extract_comments_in_line(line, None, None, False)
            self.assertEqual(tail, '')
            self.assertEqual(comments, [])
            self.assertEqual(comment, '')
            self.assertIsNotNone(matcher)
            self.assertTrue(is_comment)

            for _ in range(empty_lines):
                line = ''
                extractor = tested.CommentExtractor([])
                tail, comments, comment, matcher, is_comment = \
                    extractor.extract_comments_in_line(line, None, matcher, is_comment)
                self.assertEqual(tail, '')
                self.assertEqual(comments, [])
                self.assertEqual(comment, '')
                self.assertIsNotNone(matcher)
                self.assertTrue(is_comment)

            line = '"""'
            extractor = tested.CommentExtractor([])
            tail, comments, comment, matcher, is_comment = \
                extractor.extract_comments_in_line(line, None, matcher, is_comment)
            self.assertEqual(tail, '')
            self.assertEqual(comments, [])
            self.assertEqual(comment, '')
            self.assertIsNone(matcher)
            self.assertTrue(is_comment)
    def test_extract_multiline1(self):
        '''Find comments in lines beginning with triple single-quotation marks'''
        extractor = tested.CommentExtractor([])
        matcher = None
        is_comment = False
        all_comments = []

        tail = "'''1st\n"
        for _ in range(10):
            tail, comments, _, matcher, is_comment = \
                extractor.extract_comments_in_line(tail, None, matcher, is_comment)
            all_comments += comments
            if not tail:
                break

        self.assertEqual(comments, [])
        self.assertIsNotNone(matcher)
        self.assertTrue(is_comment)

        tail = "2nd line\n"
        extractor = tested.CommentExtractor([])
        for _ in range(10):
            tail, comments, _, matcher, is_comment = \
                extractor.extract_comments_in_line(tail, '1st', matcher, is_comment)
            all_comments += comments
            if not tail:
                break
        self.assertEqual(comments, [])
        self.assertIsNotNone(matcher)
        self.assertTrue(is_comment)

        tail = "'string'\n"
        extractor = tested.CommentExtractor([])
        for _ in range(10):
            tail, comments, _, matcher, is_comment = \
                extractor.extract_comments_in_line(tail, '1st 2nd line', matcher, is_comment)
            all_comments += comments
            if not tail:
                break
        self.assertEqual(comments, [])
        self.assertIsNotNone(matcher)
        self.assertTrue(is_comment)

        tail = "last''' code # comment\n"
        extractor = tested.CommentExtractor([])
        for _ in range(10):
            tail, comments, _, matcher, is_comment = \
                extractor.extract_comments_in_line(tail, '1st 2nd line string', matcher, is_comment)
            all_comments += comments
            if not tail:
                break
        self.assertEqual(all_comments, ['last', '', 'comment'])
        self.assertIsNone(matcher)
    def test_extract_comments_hatch(self):
        '''Find comments after #'''
        cases = [['', None, []],
                 ['#word ', None, ['word']],
                 ['# word ', None, ['word']],
                 ['## word ', None, ['word']],
                 ['### word ', None, ['word']],
                 ['## # of e', None, ['# of e']],
                 ['code # word', None, ['word']],
                 ['code # word', 'a', ['', 'word']]]
        extractor = tested.CommentExtractor([])
        for line, leading_comment, expected_comments in cases:
            matcher = None
            is_comment = False
            tail = line
            all_comments = []
            for _ in range(10):
                tail, comments, _, matcher, is_comment = \
                    extractor.extract_comments_in_line(tail, leading_comment, matcher, is_comment)
                all_comments += comments
                if not tail:
                    break

            self.assertEqual(all_comments, expected_comments)
            self.assertIsNone(matcher)
    def test_format_comments_not_empty(self):
        '''Format non-empty comments'''
        cases = [[[''], '', 'line 1:', 'line 2:',
                  'line 1:\n\n'],
                 [['first'], '', 'line 1:', 'line 2:',
                  'line 1:first\n\n'],
                 [['second'], 'first', 'line 1:', 'line 2:',
                  'line 1:first second\n\n'],
                 [['second', 'third'], 'first', 'line 1:', 'line 2:',
                  'line 1:first second\n\nline 2:third\n\n'],
                 [[''], 'first', 'line 1:', 'line 2:',
                  'line 1:first\n\n'],
                 [['second'], 'first', 'line 1:', 'line 1:',
                  'line 1:first second\n\n'],
                 [['second'], 'first', 'line 1:', 'line 2:',
                  'line 1:first second\n\n'],
                 [['second', 'third'], 'first', 'line 1:', 'line 2:',
                  'line 1:first second\n\nline 2:third\n\n']]
        extractor = tested.CommentExtractor([])

        for comments, leading_comment, line_header, current_line_header, \
            expected_comment_string in cases:
            comment_string, leading_comment, line_header = extractor.format_comments(
                comments, leading_comment, line_header, current_line_header)
            self.assertEqual(comment_string, expected_comment_string)
            self.assertIsNone(leading_comment)
            self.assertEqual(line_header, '')
    def test_extract_comments_multiline2(self):
        '''Find comments in lines beginning with triple double-quotation marks'''
        extractor = tested.CommentExtractor([])
        matcher = None
        is_comment = False
        all_comments = []
        comment = ''

        tail = '"""1st\n'
        for _ in range(10):
            tail, comments, comment, matcher, is_comment = \
                extractor.extract_comments_in_line(tail, None, matcher, is_comment)
            all_comments += comments
            if not tail:
                break
        self.assertEqual(comments, [])
        self.assertEqual(comment, '1st')
        self.assertIsNotNone(matcher)
        self.assertTrue(is_comment)

        tail = '2nd line""" "string" """next\n'
        for _ in range(10):
            tail, comments, comment, matcher, is_comment = \
                extractor.extract_comments_in_line(tail, '1st', matcher, is_comment)
            all_comments += comments
            if not tail:
                break
        self.assertEqual(all_comments, ['2nd line'])
        self.assertEqual(comment, 'next')
        self.assertIsNotNone(matcher)
        self.assertTrue(is_comment)
    def test_extract_delimiter_comment1(self):
        '''Find string literals including triple single-quotation marks'''
        cases = [["'''", ''],
                 ["pre'''comment", 'comment'],
                 ["pre'''comment'''post", "comment'''post"],
                 ['pre"""comment', 'comment'],
                 ['pre"""comment"""post', 'comment"""post'],
                 ['pre"""comment' + "'''post", "comment'''post"],
                 ["pre'''comment" + '"""post', 'comment"""post']]

        extractor = tested.CommentExtractor([])
        for line, expected_tail in cases:
            matcher, tail, is_comment = extractor.extract_delimiter(line)
            self.assertIsNotNone(matcher)
            self.assertEqual(tail, expected_tail)
            self.assertTrue(is_comment)
        extractor = tested.CommentExtractor([])
 def test_extract_delimiter_invalid(self):
     '''Cannot find delimiters'''
     extractor = tested.CommentExtractor([])
     for line in ['', 'a']:
         matcher, tail, is_comment = extractor. \
                                     extract_delimiter(line)
         self.assertIsNone(matcher)
         self.assertEqual(tail, line)
         self.assertFalse(is_comment)
    def test_format_comments_empty(self):
        '''Format empty comments'''
        extractor = tested.CommentExtractor([])
        comment_string, leading_comment, line_header = extractor.format_comments(
            [], 'comment', 'line 1:', 'line 2:')

        self.assertEqual(comment_string, '')
        self.assertEqual(leading_comment, 'comment')
        self.assertEqual(line_header, 'line 1:')
    def test_format_comment_to_next_continued(self):
        '''Format comments followed by its previous line'''
        cases = [[None, None, 'next'],
                 ['', None, 'next'],
                 ['first', None, 'first next']]
        extractor = tested.CommentExtractor([])

        for leading_comment, matcher, expected_leading_comment in cases:
            comment_string, leading_comment, line_header = extractor.format_comment_to_next(
                'next', leading_comment, matcher, 'line 1:')
            self.assertEqual(comment_string, '')
            self.assertEqual(leading_comment, expected_leading_comment)
            self.assertEqual(line_header, 'line 1:')
 def test_extract_comments_triple(self):
     '''Find comments after triple double-quotation marks'''
     cases = [['"""word', 'word'],
              ["code ''' word", ' word']]
     extractor = tested.CommentExtractor([])
     for line, expected_tail in cases:
         tail, comments, comment, matcher, is_comment = \
             extractor.extract_comments_in_line(line, None, None, False)
         self.assertEqual(tail, expected_tail)
         self.assertEqual(comments, [])
         self.assertEqual(comment, '')
         self.assertIsNotNone(matcher)
         self.assertTrue(is_comment)
    def test_parse_line_multiline_quotes(self):
        '''Parse a comment in multiple lines'''
        errstream = MockOutputStream()
        state = tested.CommentExtractorState()
        filename = 'F'
        extractor = tested.CommentExtractor([])

        cases = [['',
                  '', '', None, True, False],
                 ["'''",
                  'F : 2\n', '', None, False, True],
                 ['First', 'F : 2\n', '', 'First', False, True],
                 ['Second', 'F : 2\n', '', 'First Second', False, True],
                 ["'''", '', 'F : 2\nFirst Second\n\n', None, True, True]]
        extractor = tested.CommentExtractor([])

        line_number = 0
        for line, expected_line_header, expected_comments, expected_leading_comment, \
            expected_matcher, expected_is_comment in cases:
            line_number += 1
            state = extractor.parse_line(line, errstream, line_number, filename, state)
            self.assertEqual(errstream.message, '')
            self.assertEqual(state.exit_status, tested.EXIT_STATUS_SUCCESS)
            self.assertEqual(state.line_header, expected_line_header)
            self.assertEqual(state.all_comments, expected_comments)

            if expected_leading_comment is None:
                self.assertIsNone(state.leading_comment)
            else:
                self.assertIsNotNone(state.leading_comment)
                self.assertEqual(state.leading_comment, expected_leading_comment)

            if expected_matcher:
                self.assertIsNone(state.matcher)
            else:
                self.assertIsNotNone(state.matcher)

            self.assertEqual(state.is_comment, expected_is_comment)
    def test_extract_delimiter_comment2(self):
        '''Find string literals including triple double-quotation marks'''
        extractor = tested.CommentExtractor([])
        line = "pre'" + '1"""2' + "'post"
        matcher, tail, is_comment = extractor.extract_delimiter(line)
        self.assertIsNotNone(matcher)
        self.assertFalse(is_comment)

        matched_obj = re.match(matcher, tail)
        self.assertIsNotNone(matched_obj)
        self.assertEqual(matched_obj.group(1), '1"""2')
        self.assertEqual(matched_obj.group(2), "'")
        self.assertEqual(matched_obj.group(3), "'")
        self.assertEqual(matched_obj.group(4), 'post')
    def test_parse_line_multiline_mixed(self):
        '''Parse comments in mixed style'''
        errstream = MockOutputStream()
        state = tested.CommentExtractorState()
        filename = 'F'
        extractor = tested.CommentExtractor([])

        cases = [['"""1st',
                  'F : 1\n', '', '1st', False, True],
                 ['2nd',
                  'F : 1\n', '', '1st 2nd', False, True],
                 ['3rd""" # 4th', '', 'F : 1\n1st 2nd 3rd\n\nF : 3\n4th\n\n', None, True, False],
                 ['code # 5th', '', 'F : 1\n1st 2nd 3rd\n\nF : 3\n4th\n\nF : 4\n5th\n\n', None, True, False]]
        extractor = tested.CommentExtractor([])

        line_number = 0
        for line, expected_line_header, expected_comments, expected_leading_comment, \
            expected_matcher, expected_is_comment in cases:
            line_number += 1
            state = extractor.parse_line(line, errstream, line_number, filename, state)
            self.assertEqual(errstream.message, '')
            self.assertEqual(state.exit_status, tested.EXIT_STATUS_SUCCESS)
            self.assertEqual(state.line_header, expected_line_header)
            self.assertEqual(state.all_comments, expected_comments)

            if expected_leading_comment is None:
                self.assertIsNone(state.leading_comment)
            else:
                self.assertIsNotNone(state.leading_comment)
                self.assertEqual(state.leading_comment, expected_leading_comment)

            if expected_matcher:
                self.assertIsNone(state.matcher)
            else:
                self.assertIsNotNone(state.matcher)

            self.assertEqual(state.is_comment, expected_is_comment)
 def test_extract_comments_string_literal(self):
     '''Find string literals'''
     cases = [['"word"', 'word"'],
              ["pre, 'word' post", "word' post"],
              ['"' + "'''" + '"', "'''" +  '"'],
              ["'" + '"""' + "'", '"""' + "'"]]
     extractor = tested.CommentExtractor([])
     for line, expected_tail in cases:
         tail, comments, comment, matcher, is_comment = \
             extractor.extract_comments_in_line(line, None, None, False)
         self.assertEqual(tail, expected_tail)
         self.assertEqual(comments, [])
         self.assertEqual(comment, '')
         self.assertIsNotNone(matcher)
         self.assertFalse(is_comment)
    def test_extract_delimiter_string1(self):
        '''Find string literals including quotation marks'''
        extractor = tested.CommentExtractor([])

        cases = [["'", ''],
                 ["pre'string", 'string'],
                 ['pre"string', 'string'],
                 ['"' + "'''", "'''"],
                 ['+ "' + "'''" + '" +', "'''" + '" +'],
                 ["'" + '"""', '"""'],
                 ["'" + '"""' + "'", '"""' + "'"]]
        for line, expected_tail in cases:
            matcher, tail, is_comment = extractor. \
                                                extract_delimiter(line)
            self.assertIsNotNone(matcher)
            self.assertEqual(tail, expected_tail)
            self.assertFalse(is_comment)
    def test_parse_lines(self):
        '''Parse complex cases for comments'''
        cases = [[[''], ''],
                 [['#comment'], 'Source : 1\ncomment\n\n'],
                 [['', '#1st', '#2nd', '', '#3rd', '', '#4th'], 'Source : 2\n1st 2nd\n\nSource : 5\n3rd\n\n'],
                 [['', '"""comment"""'], 'Source : 2\ncomment\n\n'],
                 [['', "'''", 'comment', "'''"], 'Source : 2\ncomment\n\n'],
                 [['', "'''a", 'b', "c'''"], 'Source : 2\na b c\n\n'],
                 [['', "'''a", 'b', "c''' # d"], 'Source : 2\na b c\n\nSource : 4\nd\n\n'],
                 [['', "'''a", 'b', "c''' '''d'''"], 'Source : 2\na b c\n\nSource : 4\nd\n\n'],
                 [['', '/*', '1st', '2nd', "*/ '''d'''"], 'Source : 2\n1st 2nd\n\nSource : 5\nd\n\n']]
        extractor = tested.CommentExtractor([])

        for lines, expected in cases:
            errstream = MockOutputStream()
            comments, status = extractor.parse_lines(errstream, MockInputStream(lines), 'Source')
            self.assertEqual(comments, expected)
            self.assertEqual(status, tested.EXIT_STATUS_SUCCESS)
    def test_format_comment_to_next_new(self):
        '''Format comments leading to its next line'''
        cases = [[None, None, '',
                  '', None, ''],
                 ['', None, '',
                  '', '', ''],
                 ['leading', None, 'line 1:',
                  'line 1:leading\n\n', None, ''],
                 ['leading', 1, 'line 1:',
                  'line 1:leading\n\n', None, 'line 1:']]
        extractor = tested.CommentExtractor([])

        for leading_comment, matcher, line_header, \
            expected_comment_string, expected_leading_comment, expected_line_header in cases:
            comment_string, leading_comment, line_header = extractor.format_comment_to_next(
                '', leading_comment, matcher, line_header)
            self.assertEqual(comment_string, expected_comment_string)
            self.assertEqual(leading_comment, expected_leading_comment)
            self.assertEqual(line_header, expected_line_header)
    def test_extract_delimiter_string2(self):
        '''Find string literals'''
        extractor = tested.CommentExtractor([])

        for delimiter in ["'", '"']:
            expected_prefix = 'PRE'
            expected_post = 'POST'
            comment = 'STRING'
            line = expected_prefix + delimiter + comment
            line += delimiter + expected_post
            matcher, tail, is_comment = extractor.extract_delimiter(line)
            self.assertIsNotNone(matcher)
            self.assertFalse(is_comment)

            matched_obj = re.match(matcher, tail)
            self.assertIsNotNone(matched_obj)
            self.assertEqual(matched_obj.group(1), comment)
            self.assertEqual(matched_obj.group(2), delimiter)
            self.assertEqual(matched_obj.group(3), delimiter)
            self.assertEqual(matched_obj.group(4), expected_post)
    def test_extract_comments_mixed(self):
        '''Find comments and string literals'''
        cases = [['"""word""" "string"', ['word']],
                 ["code 'string' # word", ['word']],
                 ["code 'string' '''a''' # word", ['a', 'word']],
                 ['"""a"""' + "'string'" + '"string"' + "'''word'''", ['a', 'word']]]
        extractor = tested.CommentExtractor([])
        for line, expected_comments in cases:
            matcher = None
            is_comment = False
            tail = line
            all_comments = []
            for _ in range(10):
                tail, comments, _, matcher, is_comment = \
                    extractor.extract_comments_in_line(tail, None, matcher, is_comment)
                all_comments += comments
                if not tail:
                    break

            self.assertEqual(all_comments, expected_comments)
    def test_is_ascll_only(self):
        '''Check whether lines contain US-ASCII only'''
        cases = [['', tested.EXIT_STATUS_SUCCESS,
                  '', tested.EXIT_STATUS_SUCCESS],
                 ['', tested.EXIT_STATUS_ERROR,
                  '', tested.EXIT_STATUS_ERROR,],
                 ['word 1', tested.EXIT_STATUS_SUCCESS,
                  '', tested.EXIT_STATUS_SUCCESS],
                 ['word 2', tested.EXIT_STATUS_ERROR,
                  '', tested.EXIT_STATUS_ERROR],
                 ['単語', tested.EXIT_STATUS_SUCCESS,
                  'Non US-ASCII character found at line 23 in F\n',
                  tested.EXIT_STATUS_ERROR],
                 ['単語', tested.EXIT_STATUS_ERROR,
                  'Non US-ASCII character found at line 23 in F\n',
                  tested.EXIT_STATUS_ERROR]]
        extractor = tested.CommentExtractor([])

        for line, old_status, expected_message, expected_status in cases:
            errstream = MockOutputStream()
            new_status = extractor.is_ascll_only(errstream, "F", 23, line, old_status)
            self.assertEqual(errstream.message, expected_message)
            self.assertEqual(new_status, expected_status)
 def test_warn_io_error(self):
     '''Warn if I/O errors occur'''
     extractor = tested.CommentExtractor([])
     errstream = MockOutputStream()
     extractor.warn_io_error(errstream, 34, 'F')
     self.assertEqual(errstream.message, 'Cannot read at line 34 in F\n')