Ejemplo n.º 1
0
    def test_numbered_text_lines_from_list_or_location_yield_same_results(self):
        test_file = self.get_test_loc('analysis/bsd-new')
        with io.open(test_file, encoding='utf-8') as inf:
            test_strings_list = inf.read().splitlines(True)

        # test when we are passing a location or a list
        from_loc = list(numbered_text_lines(location=test_file))
        from_list = list(numbered_text_lines(location=test_strings_list))
        assert from_loc != from_list
        assert len(from_loc) > len(from_list)
        assert ''.join(l for _, l in from_loc) == ''.join(l for _, l in from_list)
Ejemplo n.º 2
0
def query_lines(location=None, query_string=None, strip=True, start_line=1):
    """
    Return an iterable of tuples (line number, text line) given a file at
    `location` or a `query string`. Include empty lines.
    Line numbers start at ``start_line`` which is 1-based by default.
    """
    # TODO: OPTIMIZE: tokenizing line by line may be rather slow
    # we could instead get lines and tokens at once in a batch?
    numbered_lines = []
    if location:
        numbered_lines = numbered_text_lines(
            location,
            demarkup=False,
            start_line=start_line,
        )

    elif query_string:
        if strip:
            keepends = False
        else:
            keepends = True

        numbered_lines = enumerate(
            query_string.splitlines(keepends),
            start_line,
        )

    for line_number, line in numbered_lines:
        if strip:
            yield line_number, line.strip()
        else:
            yield line_number, line.rstrip('\n') + '\n'
Ejemplo n.º 3
0
    def file_content(self):
        """
        Return the content of this Resource file using TextCode utilities for
        optimal compatibility.
        """
        from textcode.analysis import numbered_text_lines

        numbered_lines = numbered_text_lines(self.location)
        return "".join(l for _, l in numbered_lines)
Ejemplo n.º 4
0
 def test_numbered_text_lines_return_correct_number_of_lines(self):
     test_file = self.get_test_loc('analysis/correct_lines')
     result = list(numbered_text_lines(test_file))
     expected = [(
         1,
         'Permission is hereby granted, free of charge, to any person obtaining '
         'a copy of this software and associated documentation files (the "Software"), '
         'to deal in the Software without restriction, including without limitation '
         'the rights to use, copy, modify, merge, , , sublicense, and/or  Software, ,'
     ), (1, u' subject')]
     assert expected == result
     assert 2 == len(result)
Ejemplo n.º 5
0
    def file_content(self):
        """
        Return the content of this Resource file using TextCode utilities for
        optimal compatibility.
        """
        from textcode.analysis import numbered_text_lines

        numbered_lines = numbered_text_lines(self.location)

        # ScanCode-toolkit is not providing the "\n" suffix when reading binary files.
        # The following is a workaround until the issue is fixed in the toolkit.
        lines = (l if l.endswith("\n") else l + "\n"
                 for _, l in numbered_lines)

        return "".join(lines)
Ejemplo n.º 6
0
    def text(self):
        """
        Return the rule text loaded from its file.
        """
        if self.text_file and exists(self.text_file):
            # IMPORTANT: use the same process as query text loading for symmetry
            numbered_lines = numbered_text_lines(self.text_file, demarkup=False, plain_text=True)
            return ''.join(l for _, l in numbered_lines)

        # used for non-file backed rules
        elif self.stored_text:
            return self.stored_text

        else:
            raise Exception('Inconsistent rule text for: ' + self.identifier + '\nfile://' + self.text_file)
Ejemplo n.º 7
0
def find(location, patterns):
    """
    Yield match and matched lines for patterns found in file at location as a
    tuple of (key, found text, text line). `patterns` is a list of tuples (key,
    compiled regex).

    Note: the location can be a list of lines for testing convenience.
    """
    if TRACE:
        from pprint import pformat
        loc = pformat(location)
        logger_debug('find(location=%(loc)r,\n  patterns=%(patterns)r)' % locals())

    for line_number, line in analysis.numbered_text_lines(location, demarkup=False):
        for key, pattern in patterns:
            for match in pattern.findall(line):

                if TRACE:
                    logger_debug('find: yielding match: key=%(key)r, '
                          'match=%(match)r,\n    line=%(line)r' % locals())
                yield key, toascii(match), line, line_number
Ejemplo n.º 8
0
def get_scancode_compatible_content(location):
    """
    Return the content of the file at `location` using the ScanCode functions
    to ensure compatibility and consistency between outputs.
    """
    return "".join(line for _, line in numbered_text_lines(location))
Ejemplo n.º 9
0
 def test_numbered_text_lines_does_not_fail_on_autocad_test_pdf(self):
     test_file = self.get_test_loc('pdf/AutoCad_Diagram.pdf')
     result = list(numbered_text_lines(test_file))
     assert [] == result
Ejemplo n.º 10
0
 def test_numbered_text_lines_handles_jsmap3(self):
     test_file = self.get_test_loc('analysis/jsmap/ar-ER.js.map')
     result = list(l for _, l in numbered_text_lines(test_file))
     expected_file = test_file + '.expected'
     check_text_lines(result, expected_file)
Ejemplo n.º 11
0
 def test_numbered_text_lines_handles_sfdb(self):
     test_file = self.get_test_loc('analysis/splinefonts/Ambrosia.sfd')
     result = list(l for _, l in numbered_text_lines(test_file))
     expected_file = test_file + '.expected'
     expected = open(expected_file, 'r').read().splitlines(True)
     assert expected == list(result)
Ejemplo n.º 12
0
 def test_image_media_do_not_yield_numbered_text_lines(self):
     test_dir = self.get_test_loc('media_without_text')
     for test_file in resource_iter(test_dir, with_dirs=False):
         result = list(numbered_text_lines(test_file))
         assert [] == result, 'Should not return text lines:' + test_file
Ejemplo n.º 13
0
 def test_mpg_media_do_not_yield_numbered_text_lines(self):
     test_dir = self.get_test_loc('media_with_text')
     for test_file in resource_iter(test_dir, with_dirs=False):
         result = list(numbered_text_lines(test_file))
         assert not result
Ejemplo n.º 14
0
 def test_archives_do_not_yield_numbered_text_lines(self):
     test_file = self.get_test_loc('archive/simple.jar')
     result = list(numbered_text_lines(test_file))
     assert [] == result
Ejemplo n.º 15
0
 def test_numbered_text_lines_return_unicode(self):
     test_file = self.get_test_loc('analysis/verify.go')
     for _lineno, line in numbered_text_lines(test_file):
         assert type(line) == str
Ejemplo n.º 16
0
 def test_numbered_text_lines_handles_broken_jsmap_as_plain_text(self):
     test_file = self.get_test_loc('analysis/jsmap/broken.js.map')
     result = list(l for _, l in numbered_text_lines(test_file))
     expected_file = test_file + '.expected'
     check_text_lines(result, expected_file)