Esempi in Python per get_index, esempi in Python per licensedcode.cache.get_index

Esempio n. 1

0

Mostra file

File: test_query.py Progetto: ocabrisses/scancode-toolkit

 def test_query_from_binary_lkms_3(self):
     location = self.get_test_loc('query/wlan_xauth.ko')
     idx = cache.get_index()
     result = Query(location, idx=idx)
     assert len(result.query_runs) < 900
     qr = result.query_runs[0]
     assert 'license dual bsd gpl' in u' '.join(idx.tokens_by_tid[t] for t in qr.matchable_tokens())

Esempio n. 2

0

Mostra file

File: test_performance.py Progetto: ocabrisses/scancode-toolkit

    def test_match_license_performance_profiling_on_full_index_match_hash(self):
        # pre-index : we are profiling only the detection, not the indexing
        idx = cache.get_index()

        stats_file = 'license_match_chunk_full_index_profile_log.txt'
        locations = [self.get_test_loc('perf/cc-by-nc-sa-3.0.SPDX')]
        self.profile_match(idx, locations, stats_file)

Esempio n. 3

0

Mostra file

File: test_detect.py Progetto: ocabrisses/scancode-toolkit

 def test_score_is_not_100_for_exact_match_with_extra_words(self):
     idx = cache.get_index()
     test_loc = self.get_test_loc('detect/score/test.txt')
     matches = idx.match(location=test_loc)
     assert 1 == len(matches)
     match = matches[0]
     assert 99 < match.score() < 100

Esempio n. 4

0

Mostra file

File: api.py Progetto: ocabrisses/scancode-toolkit

def get_licenses(location, min_score=0, include_text=False, diag=False, license_url_template=DEJACODE_LICENSE_URL):
    """
    Yield mappings of license data detected in the file at `location`.

    `minimum_score` is a minimum score threshold from 0 to 100. The
    default is 0 means that all license matches will be returned. With
    any other value matches that have a score below minimum score with
    not be returned.

    if `include_text` is True, the matched text is included in the
    returned data.

    If `diag` is True, additional match details are returned with the
    matched_rule key of the returned mapping.
    """
    from licensedcode.cache import get_index
    from licensedcode.cache import get_licenses_db

    idx = get_index()
    licenses = get_licenses_db()

    for match in idx.match(location=location, min_score=min_score):
        if include_text:
            matched_text = match.matched_text(whole_lines=False)
        for license_key in match.rule.licenses:
            lic = licenses.get(license_key)
            result = OrderedDict()
            result['key'] = lic.key
            result['score'] = match.score()
            result['short_name'] = lic.short_name
            result['category'] = lic.category
            result['owner'] = lic.owner
            result['homepage_url'] = lic.homepage_url
            result['text_url'] = lic.text_urls[0] if lic.text_urls else ''
            result['reference_url'] = license_url_template.format(lic.key)
            spdx_key = lic.spdx_license_key
            result['spdx_license_key'] = spdx_key
            if spdx_key:
                spdx_key = lic.spdx_license_key.rstrip('+')
                spdx_url = SPDX_LICENSE_URL.format(spdx_key)
            else:
                spdx_url = ''
            result['spdx_url'] = spdx_url
            result['start_line'] = match.start_line
            result['end_line'] = match.end_line
            matched_rule = result['matched_rule'] = OrderedDict()
            matched_rule['identifier'] = match.rule.identifier
            matched_rule['license_choice'] = match.rule.license_choice
            matched_rule['licenses'] = match.rule.licenses
            # FIXME: for sanity these should always be included???
            if diag:
                matched_rule['matcher'] = match.matcher
                matched_rule['rule_length'] = match.rule.length
                matched_rule['matched_length'] = match.ilen()
                matched_rule['match_coverage'] = match.coverage()
                matched_rule['rule_relevance'] = match.rule.relevance
            # FIXME: for sanity this should always be included?????
            if include_text:
                result['matched_text'] = matched_text
            yield result

Esempio n. 5

0

Mostra file

File: test_detect.py Progetto: ocabrisses/scancode-toolkit

 def test_match_does_not_detect_spurrious_short_apache_rule(self):
     idx = cache.get_index()
     querys = u'''
     <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
     <title>Apache log4j 1.2 - Continuous Integration</title>
     '''
     matches = idx.match(query_string=querys)
     assert [] == matches

Esempio n. 6

0

Mostra file

File: test_query.py Progetto: ocabrisses/scancode-toolkit

 def test_query_from_binary_lkms_2(self):
     location = self.get_test_loc('query/eeepc_acpi.ko')
     idx = cache.get_index()
     result = Query(location, idx=idx)
     assert len(result.query_runs) < 500
     qrs = result.query_runs[5:10]
     assert any('license gpl' in u' '.join(idx.tokens_by_tid[t] for t in qr.matchable_tokens())
                for qr in qrs)

Esempio n. 7

0

Mostra file

File: license_test_utils.py Progetto: yudhik11/scancode-toolkit

    def closure_test_function(*args, **kwargs):
        idx = cache.get_index()
        matches = idx.match(
            location=test_file,
            min_score=min_score,
            # if negative, do not detect negative rules when testing negative rules
            detect_negative=detect_negative)

        if not matches:
            matches = []

        # TODO: we should expect matches properly, not with a grab bag of flat license keys
        # flattened list of all detected license keys across all matches.
        detected_licenses = functional.flatten(
            map(unicode, match.rule.licenses) for match in matches)
        try:
            if not detect_negative:
                # we skipped negative detection for a negative rule
                # we just want to ensure that the rule was matched proper
                assert matches and not expected_licenses and not detected_licenses
            else:
                assert expected_licenses == detected_licenses
        except:
            # On failure, we compare against more result data to get additional
            # failure details, including the test_file and full match details
            match_failure_trace = []

            if trace_text:
                for match in matches:
                    qtext, itext = get_texts(match,
                                             location=test_file,
                                             idx=idx)
                    rule_text_file = match.rule.text_file
                    rule_data_file = match.rule.data_file
                    match_failure_trace.extend([
                        '', '', '======= MATCH ====', match,
                        '======= Matched Query Text for:',
                        'file://{test_file}'.format(**locals())
                    ])
                    if test_data_file:
                        match_failure_trace.append(
                            'file://{test_data_file}'.format(**locals()))
                    match_failure_trace.append(qtext.splitlines())
                    match_failure_trace.extend([
                        '',
                        '======= Matched Rule Text for:'
                        'file://{rule_text_file}'.format(**locals()),
                        'file://{rule_data_file}'.format(**locals()),
                        itext.splitlines(),
                    ])
            # this assert will always fail and provide a detailed failure trace
            assert expected_licenses == detected_licenses + [
                test_name, 'test file: file://' + test_file
            ] + match_failure_trace

Esempio n. 8

0

Mostra file

File: osikeys.py Progetto: SmartsYoung/FenixscanX

def find_osi_map(license: "OSI License Object"):
    """
    Return Scanengine key mapped to OSI License `license`
    """
    idx = get_index()
    osi_key = license['id']
    # search for Scanengine License matching OSI Key
    matches = list(idx.match(query_string=osi_key))
    if not matches:
        return None
    return matches[0].rule.license_expression

Esempio n. 9

0

Mostra file

File: test_detect.py Progetto: ocabrisses/scancode-toolkit

    def test_match_in_binary_lkms_1(self):
        idx = cache.get_index()
        qloc = self.get_test_loc('positions/ath_pci.ko')
        matches = idx.match(location=qloc)
        assert 1 == len(matches)
        match = matches[0]
        assert ['bsd-new', 'gpl-2.0'] == match.rule.licenses

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert 'license Dual BSD GPL' == qtext
        assert 'license Dual BSD GPL' == itext

Esempio n. 10

0

Mostra file

File: test_detect.py Progetto: roscopecoltran/scancode-toolkit

    def test_match_in_binary_lkms_1(self):
        idx = cache.get_index()
        qloc = self.get_test_loc('positions/ath_pci.ko')
        matches = idx.match(location=qloc)
        assert 1 == len(matches)
        match = matches[0]
        assert ['bsd-new', 'gpl-2.0'] == match.rule.licenses

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert 'license Dual BSD GPL' == qtext
        assert 'license Dual BSD GPL' == itext

Esempio n. 11

0

Mostra file

 def test_all_spdx_tokens_exists_in_dictionary(self):
     idx = cache.get_index()
     dic = idx.dictionary
     licenses = cache.get_licenses_db()
     tokens = set(models.get_all_spdx_key_tokens(licenses))
     keys = set(idx.dictionary)
     try:
         assert tokens.issubset(keys)
     except:
         for token in tokens:
             dic[token]

Esempio n. 12

0

Mostra file

File: test_query.py Progetto: victorcruceru/scancode-toolkit

 def test_Query_tokens_by_line_behaves_the_same_on_python_2_and_python_3(
         self):
     location = self.get_test_loc('query/query_lines/yahoo-eula.txt')
     idx = cache.get_index()
     query = Query(location, idx=idx)
     tbl = list(query.tokens_by_line())
     # inject the actual token string for sanity
     tbt = idx.tokens_by_tid
     results = [[[i, i and tbt[i] or i] for i in line] for line in tbl]
     expected = self.get_test_loc('query/query_lines/yahoo-eula.txt.json')
     check_result_equals_expected_json(results, expected, regen=False)

Esempio n. 13

0

Mostra file

File: buildrules.py Progetto: yinqianshu/scancode-toolkit

def rule_exists(text):
    """
    Return the matched rule if the text is an existing rule matched exactly, False otherwise.
    """
    idx = cache.get_index()

    matches = idx.match(query_string=text)
    if len(matches) != 1:
        return False
    match = matches[0]
    if match.matcher == match_hash.MATCH_HASH:
        return match.rule.identifier

Esempio n. 14

0

Mostra file

File: test_detect.py Progetto: ocabrisses/scancode-toolkit

    def test_match_in_binary_lkms_2(self):
        idx = cache.get_index()
        qloc = self.get_test_loc('positions/eeepc_acpi.ko')
        matches = idx.match(location=qloc)
        assert 1 == len(matches)
        match = matches[0]
        assert ['gpl-1.0-plus'] == match.rule.licenses
        assert match.ispan == Span(0, 1)

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert 'license GPL' == qtext
        assert 'License GPL' == itext

Esempio n. 15

0

Mostra file

    def test_query_from_binary_lkms_2(self):
        location = self.get_test_loc('query/eeepc_acpi.ko')
        idx = cache.get_index()
        result = Query(location, idx=idx)
        assert len(result.query_runs) < 500

        qrs = result.query_runs[:10]
        # for i, qr in enumerate(qrs):
        #     print('qr:', i,
        #           'qr_text:', ' '.join(idx.tokens_by_tid[t] for t in qr.matchable_tokens()))
        assert any('license gpl' in ' '.join(idx.tokens_by_tid[t] for t in qr.matchable_tokens())
                   for qr in qrs)

Esempio n. 16

0

Mostra file

File: test_detect.py Progetto: roscopecoltran/scancode-toolkit

    def test_match_in_binary_lkms_2(self):
        idx = cache.get_index()
        qloc = self.get_test_loc('positions/eeepc_acpi.ko')
        matches = idx.match(location=qloc)
        assert 1 == len(matches)
        match = matches[0]
        assert ['gpl'] == match.rule.licenses
        assert match.ispan == Span(0, 1)

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert 'license GPL' == qtext
        assert 'License GPL' == itext

Esempio n. 17

0

Mostra file

File: test_query.py Progetto: utkarshg99/scancode-toolkit

    def test_query_run_for_text_with_long_lines(self):
        location1 = self.get_test_loc('query/long_lines.txt')
        location2 = self.get_test_loc('query/not_long_lines.txt')
        from typecode.contenttype import get_type
        ft1 = get_type(location1)
        assert ft1.is_text_with_long_lines
        ft2 = get_type(location2)
        assert not ft2.is_text_with_long_lines

        idx = cache.get_index()
        assert len(Query(location1, idx=idx).query_runs) == 3
        assert len(Query(location2, idx=idx).query_runs) == 14

Esempio n. 18

0

Mostra file

File: test_detection_validate.py Progetto: sthagen/scancode-toolkit

def check_special_rule_cannot_be_detected(rule):
    idx = cache.get_index()
    results = idx.match(location=rule.text_file)
    if results:
        data_file = rule.data_file
        if not data_file:
            data_file = rule.text_file.replace('.LICENSE', '.yml')
        # On failure, we compare againto get additional failure details such as
        # a clickable text_file path
        results = (results, f'file://{data_file}', f'file://{rule.text_file}')
        # this assert will always fail and provide a more detailed failure trace
        assert results == []

Esempio n. 19

0

Mostra file

File: api.py Progetto: xavierfigueroav/scancode-toolkit

def get_licenses(location, min_score=0,
                 include_text=False, license_text_diagnostics=False,
                 license_url_template=DEJACODE_LICENSE_URL,
                 deadline=sys.maxsize, **kwargs):
    """
    Return a mapping or detected_licenses for licenses detected in the file at
    `location`

    This mapping contains two keys:
     - 'licenses' with a value that is list of mappings of license information.
     - 'license_expressions' with a value that is list of license expression
       strings.

    `minimum_score` is a minimum score threshold from 0 to 100. The default is 0
    means that all license matches are returned. Otherwise, matches with a score
    below `minimum_score` are returned.

    if `include_text` is True, matched text is included in the returned
    `licenses` data.
    """
    from licensedcode import cache
    idx = cache.get_index()

    detected_licenses = []
    detected_expressions = []

    matches = idx.match(
        location=location, min_score=min_score, deadline=deadline, **kwargs)

    for match in matches:
        matched_text = None
        # TODO: handle whole lines with the case of very long lines
        if include_text:
            if license_text_diagnostics:
                matched_text = match.matched_text(whole_lines=False)
            else:
                highlight_not_matched = highlight_matched = u'%s'
                matched_text = match.matched_text(
                    highlight_matched=highlight_matched,
                    highlight_not_matched=highlight_not_matched,
                    whole_lines=True)

        detected_expressions.append(match.rule.license_expression)

        detected_licenses.extend(
            _licenses_data_from_match(match, matched_text, license_url_template)
        )

    return OrderedDict([
        ('licenses', detected_licenses),
        ('license_expressions', detected_expressions),
    ])

Esempio n. 20

0

Mostra file

File: index.py Progetto: yudhik11/scancode-toolkit

def get_license_matches(location=None, query_string=None, min_score=0):
    """
    Yield detected license matches in the file at `location` or the
    `query_string` string.

    `min_score` is a minimum score threshold for a license match from 0 to 100
    percent. 100 is a high confidence match and 0 a low confidence match. A
    `min_score` of 0 means all matches are returned.

    The minimum length for an approximate match is four tokens.
    Spurrious matched are always filtered.
    """
    return get_index().match(location=location, query_string=query_string, min_score=min_score)

Esempio n. 21

0

Mostra file

    def test_match_has_correct_positions_basic(self):
        idx = cache.get_index()
        querys = u'''Licensed under the GNU General Public License (GPL).
                     Licensed under the GNU General Public License (GPL).
                     Licensed under the GNU General Public License (GPL).'''

        matches = idx.match(query_string=querys)

        rule = [r for r in idx.rules_by_rid if r.identifier == 'gpl_69.RULE'][0]
        m1 = LicenseMatch(rule=rule, qspan=Span(0, 7), ispan=Span(0, 7), start_line=1, end_line=1)
        m2 = LicenseMatch(rule=rule, qspan=Span(8, 15), ispan=Span(0, 7), start_line=2, end_line=2)
        m3 = LicenseMatch(rule=rule, qspan=Span(16, 23), ispan=Span(0, 7), start_line=3, end_line=3)
        assert [m1, m2, m3] == matches

Esempio n. 22

0

Mostra file

File: test_detect.py Progetto: ocabrisses/scancode-toolkit

    def test_match_has_correct_positions_basic(self):
        idx = cache.get_index()
        querys = u'''Licensed under the GNU General Public License (GPL).
                     Licensed under the GNU General Public License (GPL).
                     Licensed under the GNU General Public License (GPL).'''

        matches = idx.match(query_string=querys)

        rule = [r for r in idx.rules_by_rid if r.identifier == 'gpl_69.RULE'][0]
        m1 = LicenseMatch(rule=rule, qspan=Span(0, 7), ispan=Span(0, 7), start_line=1, end_line=1)
        m2 = LicenseMatch(rule=rule, qspan=Span(8, 15), ispan=Span(0, 7), start_line=2, end_line=2)
        m3 = LicenseMatch(rule=rule, qspan=Span(16, 23), ispan=Span(0, 7), start_line=3, end_line=3)
        assert [m1, m2, m3] == matches

Esempio n. 23

0

Mostra file

File: test_detect.py Progetto: ocabrisses/scancode-toolkit

 def test_match_in_binary_lkms_3(self):
     idx = cache.get_index()
     qloc = self.get_test_loc('positions/wlan_xauth.ko')
     matches = idx.match(location=qloc)
     assert 1 == len(matches)
     match = matches[0]
     assert ['bsd-new', 'gpl-2.0'] == match.rule.licenses
     assert 100 == match.coverage()
     assert 20 == match.score()
     qtext, itext = get_texts(match, location=qloc, idx=idx)
     assert 'license Dual BSD GPL' == qtext
     assert 'license Dual BSD GPL' == itext
     assert Span(0, 3) == match.ispan

Esempio n. 24

0

Mostra file

File: test_match_spdx_lid.py Progetto: utkarshg99/scancode-toolkit

    def test_method(self):
        idx = cache.get_index()
        qry = Query(location=test_loc, idx=idx)
        results = [list(l) for l in qry.spdx_lines]
        if regen:
            with open(expected_loc, 'wb') as ef:
                json.dump(results, ef, indent=2)
            expected = results
        else:
            with open(expected_loc, 'rb') as ef:
                expected = json.load(ef, object_pairs_hook=OrderedDict)

        assert expected == results

Esempio n. 25

0

Mostra file

File: test_match_spdx_lid.py Progetto: Siddhant-K-code/scancode-toolkit

    def test_method(self):
        idx = cache.get_index()
        qry = Query(location=test_loc, idx=idx)
        results = [list(l) for l in qry.spdx_lines]
        if regen:
            with open(expected_loc, 'w') as ef:
                json.dump(results, ef, indent=2)
            expected = results
        else:
            with open(expected_loc) as ef:
                expected = json.load(ef)

        assert results == expected

Esempio n. 26

0

Mostra file

File: test_detect.py Progetto: roscopecoltran/scancode-toolkit

 def test_match_in_binary_lkms_3(self):
     idx = cache.get_index()
     qloc = self.get_test_loc('positions/wlan_xauth.ko')
     matches = idx.match(location=qloc)
     assert 1 == len(matches)
     match = matches[0]
     assert ['bsd-new', 'gpl-2.0'] == match.rule.licenses
     assert 100 == match.coverage()
     assert 20 == match.score()
     qtext, itext = get_texts(match, location=qloc, idx=idx)
     assert 'license Dual BSD GPL' == qtext
     assert 'license Dual BSD GPL' == itext
     assert Span(0, 3) == match.ispan

Esempio n. 27

0

Mostra file

File: license_test_utils.py Progetto: ocabrisses/scancode-toolkit

def check_license(location=None, query_string=None, expected=(), test_data_dir=None):
    if query_string:
        idx = cache.get_index()
        matches = idx.match(location=location, query_string=query_string)
        results = functional.flatten(map(unicode, match.rule.licenses) for match in matches)
        assert expected == results
    else:
        test_name = python_safe_name('test_' + location.replace(test_data_dir, ''))
        tester = make_license_test_function(
            expected_licenses=expected, test_file=location,
            test_data_file=None, test_name=test_name,
            trace_text=True)
        tester()

Esempio n. 28

0

Mostra file

File: test_detect.py Progetto: aj4ayushjain/scancode-toolkit

 def check_position(self, test_path, expected, with_span=True):
     """
     Check license detection in file or folder against expected result.
     Expected is a list of (license, lines span, qspan span) tuples.
     """
     test_location = self.get_test_loc(test_path)
     results = []
     # FULL INDEX!!
     idx = cache.get_index()
     matches = idx.match(test_location)
     for match in matches:
         for detected in match.rule.license_keys():
             results.append((detected, match.lines(), with_span and match.qspan or None))
     assert expected == results

Esempio n. 29

0

Mostra file

    def test_method(self):
        idx = cache.get_index()
        qry = Query(location=test_loc, idx=idx)
        results = [list(l) for l in qry.spdx_lines]
        if regen:
            wmode = 'w'
            with open(expected_loc, wmode) as ef:
                json.dump(results, ef, indent=2)
            expected = results
        else:
            with open(expected_loc, 'rb') as ef:
                expected = json.load(ef, encoding='utf-8')

        assert expected == results

Esempio n. 30

0

Mostra file

File: test_query.py Progetto: Siddhant-K-code/scancode-toolkit

    def test_match_does_not_change_query_unknown_positions(self):
        from licensedcode.match import LicenseMatch
        from licensedcode.spans import Span

        location = self.get_test_loc('query/unknown_positions/lz4.license.txt')
        idx = cache.get_index()
        # build a query first
        qry1 = Query(location, idx=idx)
        # this has the side effect to populate the unknown
        txt = ' '.join(f'{i}-{idx.tokens_by_tid[t]}'
                       for i, t in enumerate(qry1.tokens))
        assert txt == (
            '0-this 1-repository 2-uses 3-2 4-different 5-licenses '
            '6-all 7-files 8-in 9-the 10-lib 11-directory 12-use 13-bsd 14-2 15-clause 16-license '
            '17-all 18-other 19-files 20-use 21-gplv2 22-license 23-unless 24-explicitly 25-stated 26-otherwise '
            '27-relevant 28-license 29-is 30-reminded 31-at 32-the 33-top 34-of 35-each 36-source 37-file '
            '38-and 39-with 40-presence 41-of 42-copying 43-or 44-license 45-file 46-in 47-associated 48-directories '
            '49-this 50-model 51-is 52-selected 53-to 54-emphasize 55-that '
            '56-files 57-in 58-the 59-lib 60-directory 61-are 62-designed 63-to 64-be 65-included 66-into 67-3rd 68-party 69-applications '
            '70-while 71-all 72-other 73-files 74-in 75-programs 76-tests 77-or 78-examples '
            '79-receive 80-more 81-limited 82-attention 83-and 84-support 85-for 86-such 87-scenario'
        )
        list(qry1.tokens_by_line())
        assert qry1.unknowns_by_pos == {}

        # run matching
        matches = idx.match(location=location)
        match = matches[0]

        rule = [
            r for r in idx.rules_by_rid
            if r.identifier == 'bsd-simplified_and_gpl-2.0_1.RULE'
        ][0]

        expected = LicenseMatch(
            matcher='2-aho',
            rule=rule,
            qspan=Span(0, 48),
            ispan=Span(0, 48),
        )

        assert match == expected

        # check that query unknown by pos is the same and empty
        qry2 = match.query

        # this was incorrectly returned as {15: 0, 20: 0, 21: 0, 41: 0, 43: 0}
        # after querying done during matching
        assert qry2.unknowns_by_pos == {}

Esempio n. 31

0

Mostra file

File: test_detection_validate.py Progetto: sthagen/scancode-toolkit

def check_rule_or_license_can_be_self_detected_exactly(rule):
    idx = cache.get_index()
    matches = idx.match(
        location=rule.text_file,
        _skip_hash_match=True,
        deadline=10,
    )
    expected = [rule.identifier, '100']
    results = flatten(
        (m.rule.identifier, str(int(m.coverage()))) for m in matches)

    try:
        assert results == expected
    except:

        from licensedcode.tracing import get_texts
        data_file = rule.data_file
        if not data_file:
            data_file = rule.text_file.replace('.LICENSE', '.yml')
        text_file = rule.text_file
        # On failure, we compare againto get additional failure details such as
        # a clickable text_file path
        failure_trace = ['======= TEST ====']
        failure_trace.extend(results)
        failure_trace.extend([
            '',
            f'file://{data_file}',
            f'file://{text_file}',
            '======================',
        ])

        for i, match in enumerate(matches):
            qtext, itext = get_texts(match)
            m_text_file = match.rule.text_file

            if match.rule.is_from_license:
                m_data_file = m_text_file.replace('LICENSE', '.yml')
            else:
                m_data_file = match.rule.data_file

            failure_trace.extend([
                '', f'======= MATCH {i} ====',
                repr(match), f'file://{m_data_file}', f'file://{m_text_file}',
                '======= Matched Query Text:', '', qtext, ''
                '======= Matched Rule Text:', '', itext
            ])

        # this assert will always fail and provide a detailed failure trace
        assert '\n'.join(failure_trace) == '\n'.join(expected)

Esempio n. 32

0

Mostra file

File: plugin_filter_clues.py Progetto: yangjie11/scancode-toolkit

    def process_codebase(self, codebase, **kwargs):
        """
        Update detected clues to remove redundant clues already found in another
        detected clue for all the resources of codebase.
        """
        if TRACE: logger_debug('RedundantFilter:process_codebase')

        from licensedcode.cache import get_index

        rules_by_id = {r.identifier: r for r in get_index().rules_by_rid}

        for resource in codebase.walk():
            filtered = filter_ignorable_resource_clues(resource, rules_by_id)
            if filtered:
                filtered.save(codebase)

Esempio n. 33

0

Mostra file

File: test_detect.py Progetto: aj4ayushjain/scancode-toolkit

 def test_match_texts_with_short_lgpl_and_gpl_notices(self):
     idx = cache.get_index()
     test_loc = self.get_test_loc('detect/short_l_and_gpls')
     matches = idx.match(location=test_loc)
     assert 6 == len(matches)
     results = [m.matched_text(whole_lines=False) for m in matches]
     expected = [
         'This software is distributed under the following licenses:\n[Driver]:      GNU General Public License (GPL)',
         'GNU Lesser General Public License (LGPL)',
         'This software is distributed under the following licenses:\n[Driver]:           GNU General Public License (GPL)',
         'GNU Lesser General Public (LGPL)',
         'GNU Lesser General Public (LGPL)',
         'GNU Lesser General Public (LGPL)'
         ]
     assert expected == results

Esempio n. 34

0

Mostra file

 def test_match_texts_with_short_lgpl_and_gpl_notices(self):
     idx = cache.get_index()
     test_loc = self.get_test_loc('detect/short_l_and_gpls')
     matches = idx.match(location=test_loc)
     assert 6 == len(matches)
     results = [m.matched_text(whole_lines=False) for m in matches]
     expected =[
         'GNU General Public License (GPL',
         'GNU Lesser General Public License (LGPL',
         'GNU General Public License (GPL',
         'GNU Lesser General Public (LGPL',
         'GNU Lesser General Public (LGPL',
         'GNU Lesser General Public (LGPL'
         ]
     assert expected == results

Esempio n. 35

0

Mostra file

File: synclic.py Progetto: ocabrisses/scancode-toolkit

def get_match(text):
    """
    Return a tuple of:
    (top matched license key or None,
      (True if this an exact match, False if the match is ok, None if the match is weak,
    the matched score).
    """
    idx = get_index()
    matches = list(idx.match(query_string=text, min_score=80))

    if not matches:
        return None, None, 0

    match = matches[0]
    query = match.query
    query_len = len(query.whole_query_run().tokens)
    rule = match.rule
    key = rule.licenses[0]

    is_exact = (
        len(matches) == 1
        and rule.is_license and len(rule.licenses) == 1
        and match.matcher == '1-hash'
        and match.score() == 100
        and match.qlen == query_len
        )
    if is_exact:
        return key, True, 100

    is_ok = (
        len(rule.licenses) == 1
        and match.coverage() > 95
        and match.score() > 95)
    if is_ok:
        return key, False, match.score()

    is_weak = (
        len(rule.licenses) == 1
        and match.coverage() > 90
        and match.score() > 90)
    if is_weak:
        return key, None, match.score()

    if match.score() > 85:
        # junk match
        return key, -1, match.score()
    else:
        return None, None, None

Esempio n. 36

0

Mostra file

File: licensedcode_test_utils.py Progetto: aj4ayushjain/scancode-toolkit

    def closure_test_function(*args, **kwargs):
        idx = cache.get_index()
        matches = idx.match(location=test_file, min_score=0)
        if not matches:
            matches = []

        detected_expressions = [
            match.rule.license_expression for match in matches
        ]

        # use detection as expected and dump test back
        if regen:
            if not expected_failure:
                license_test.license_expressions = detected_expressions
            license_test.dump()
            return

        try:
            assert expected_expressions == detected_expressions
        except:
            # On failure, we compare against more result data to get additional
            # failure details, including the test_file and full match details
            failure_trace = detected_expressions[:]
            failure_trace.extend([test_name, 'test file: file://' + test_file])

            for match in matches:
                qtext, itext = get_texts(match, location=test_file, idx=idx)
                rule_text_file = match.rule.text_file
                rule_data_file = match.rule.data_file
                failure_trace.extend([
                    '', '', '======= MATCH ====', match,
                    '======= Matched Query Text for:',
                    'file://{test_file}'.format(**locals())
                ])
                if test_data_file:
                    failure_trace.append(
                        'file://{test_data_file}'.format(**locals()))

                failure_trace.append(qtext.splitlines())
                failure_trace.extend([
                    '',
                    '======= Matched Rule Text for:'
                    'file://{rule_text_file}'.format(**locals()),
                    'file://{rule_data_file}'.format(**locals()),
                    itext.splitlines(),
                ])
            # this assert will always fail and provide a detailed failure trace
            assert expected_expressions == failure_trace

Esempio n. 37

0

Mostra file

def _print_rule_stats():
    """
    Print rules statistics.
    """
    from licensedcode.cache import get_index
    idx = get_index()
    rules = idx.rules_by_rid
    sizes = Counter(r.length for r in rules)
    print('Top 15 lengths: ', sizes.most_common(15))
    print('15 smallest lengths: ',
          sorted(sizes.iteritems(), key=itemgetter(0))[:15])

    high_sizes = Counter(r.high_length for r in rules)
    print('Top 15 high lengths: ', high_sizes.most_common(15))
    print('15 smallest high lengths: ',
          sorted(high_sizes.iteritems(), key=itemgetter(0))[:15])

Esempio n. 38

0

Mostra file

File: models.py Progetto: ocabrisses/scancode-toolkit

def _print_rule_stats():
    """
    Print rules statistics.
    """
    from licensedcode.cache import get_index
    idx = get_index()
    rules = idx.rules_by_rid
    sizes = Counter(r.length for r in rules)
    print('Top 15 lengths: ', sizes.most_common(15))
    print('15 smallest lengths: ', sorted(sizes.iteritems(),
                                          key=itemgetter(0))[:15])

    high_sizes = Counter(r.high_length for r in rules)
    print('Top 15 high lengths: ', high_sizes.most_common(15))
    print('15 smallest high lengths: ', sorted(high_sizes.iteritems(),
                                               key=itemgetter(0))[:15])

Esempio n. 39

0

Mostra file

File: synclic.py Progetto: vsurge/barista

def get_match(text):
    """
    Return a tuple of (license key, True if exact match, match score, match text)
    e.g.:
        - top matched license key or None,
        - True if this an exact match, False if the match is ok, None if the match is weak,
        - match score or 0 or None
        - matched text or None
    """

    from licensedcode.cache import get_index

    idx = get_index()
    matches = list(idx.match(query_string=text, min_score=80))
    if not matches:
        return None, None, 0, None

    match = matches[0]
    matched_text = match.matched_text(whole_lines=False)
    query = match.query
    query_len = len(query.whole_query_run().tokens)
    rule = match.rule
    rule_licenses = rule.license_keys()
    key = rule_licenses[0]

    is_exact = (len(matches) == 1 and rule.is_license
                and len(rule_licenses) == 1 and match.matcher == '1-hash'
                and match.score() == 100 and match.qlen == query_len)

    if is_exact:
        return key, True, 100, matched_text

    is_ok = (len(rule_licenses) == 1 and match.coverage() > 95
             and match.score() > 95)
    if is_ok:
        return key, False, match.score(), matched_text

    is_weak = (len(rule_licenses) == 1 and match.coverage() > 90
               and match.score() > 90)
    if is_weak:
        return key, None, match.score(), matched_text

    if match.score() > 85:
        # junk match
        return key, -1, match.score(), matched_text
    else:
        return None, None, None, None

Esempio n. 40

0

Mostra file

File: test_detect.py Progetto: roscopecoltran/scancode-toolkit

    def test_match_works_for_apache_rule(self):
        idx = cache.get_index()
        querys = u'''I am not a license.

            The Apache Software License, Version 2.0
            http://www.apache.org/licenses/LICENSE-2.0.txt
            '''
        matches = idx.match(query_string=querys)

        assert 1 == len(matches)
        match = matches[0]
        assert 'apache-2.0_8.RULE' == match.rule.identifier
        assert match_aho.MATCH_AHO_EXACT == match.matcher

        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' == qtext
        assert (3, 4) == match.lines()

Esempio n. 41

0

Mostra file

File: test_detect.py Progetto: ocabrisses/scancode-toolkit

    def test_match_works_for_apache_rule(self):
        idx = cache.get_index()
        querys = u'''I am not a license.

            The Apache Software License, Version 2.0
            http://www.apache.org/licenses/LICENSE-2.0.txt
            '''
        matches = idx.match(query_string=querys)

        assert 1 == len(matches)
        match = matches[0]
        assert 'apache-2.0_8.RULE' == match.rule.identifier
        assert match_aho.MATCH_AHO_EXACT == match.matcher

        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' == qtext
        assert (3, 4) == match.lines()

Esempio n. 42

0

Mostra file

File: license_test_utils.py Progetto: ocabrisses/scancode-toolkit

    def closure_test_function(*args, **kwargs):
        idx = cache.get_index()
        matches = idx.match(location=test_file, min_score=min_score,
                            # if negative, do not detect negative rules when testing negative rules
                            detect_negative=detect_negative)

        if not matches:
            matches = []

        # TODO: we should expect matches properly, not with a grab bag of flat license keys
        # flattened list of all detected license keys across all matches.
        detected_licenses = functional.flatten(map(unicode, match.rule.licenses) for match in matches)
        try:
            if not detect_negative:
                # we skipped negative detection for a negative rule
                # we just want to ensure that the rule was matched proper
                assert matches and not expected_licenses and not detected_licenses
            else:
                assert expected_licenses == detected_licenses
        except:
            # On failure, we compare against more result data to get additional
            # failure details, including the test_file and full match details
            match_failure_trace = []

            if trace_text:
                for match in matches:
                    qtext, itext = get_texts(match, location=test_file, idx=idx)
                    rule_text_file = match.rule.text_file
                    rule_data_file = match.rule.data_file
                    match_failure_trace.extend(['', '',
                        '======= MATCH ====', match,
                        '======= Matched Query Text for:',
                        'file://{test_file}'.format(**locals())
                    ])
                    if test_data_file:
                        match_failure_trace.append('file://{test_data_file}'.format(**locals()))
                    match_failure_trace.append(qtext.splitlines())
                    match_failure_trace.extend(['',
                        '======= Matched Rule Text for:'
                        'file://{rule_text_file}'.format(**locals()),
                        'file://{rule_data_file}'.format(**locals()),
                        itext.splitlines(),
                    ])
            # this assert will always fail and provide a detailed failure trace
            assert expected_licenses == detected_licenses + [test_name, 'test file: file://' + test_file] + match_failure_trace

Esempio n. 43

0

Mostra file

File: licensing.py Progetto: Siddhant-K-code/scancode-toolkit

def get_license_matches_from_query_string(query_string, start_line=1):
    """
    Returns a sequence of LicenseMatch objects from license detection of the
    `query_string` starting at ``start_line`` number. This is useful when
    matching a text fragment alone when it is part of a larger text.
    """
    if not query_string:
        return []
    from licensedcode import cache

    idx = cache.get_index()
    qry = query.build_query(
        query_string=query_string,
        idx=idx,
        start_line=start_line,
    )

    return idx.match_query(qry=qry)

Esempio n. 44

0

Mostra file

File: test_detect.py Progetto: ocabrisses/scancode-toolkit

 def check_position(self, test_path, expected, with_span=True, print_results=False):
     """
     Check license detection in file or folder against expected result.
     Expected is a list of (license, lines span, qspan span) tuples.
     """
     test_location = self.get_test_loc(test_path)
     results = []
     # FULL INDEX!!
     idx = cache.get_index()
     matches = idx.match(test_location)
     for match in matches:
         for detected in match.rule.licenses:
             if print_results:
                 print()
                 print(match)
                 print_matched_texts(match, location=test_location, idx=idx)
             results.append((detected, match.lines(), with_span and match.qspan or None))
     assert expected == results

Esempio n. 45

0

Mostra file

    def test_Query_with_spdx_basic(self):
        idx = cache.get_index()
        querys = '''
 * SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)
 * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
            Always

From uboot: the first two lines are patch-like:
 * SPDX-License-Identifier:     GPL-2.0+ BSD-2-Clause
            '''

        qry = Query(query_string=querys, idx=idx)
        expected = [
            ('SPDX-License-Identifier:  (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)',  0,  15),
            ('SPDX-License-Identifier:  EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0',16,  34),
            ('SPDX-License-Identifier:      GPL-2.0+ BSD-2-Clause', 45, 53)]

        assert qry.spdx_lines == expected

Esempio n. 46

0

Mostra file

File: test_query.py Progetto: utkarshg99/scancode-toolkit

    def test_query_run_tokens_matchable(self):
        idx = cache.get_index()
        # NOTE: this is not a token present in any rules or licenses
        unknown_token = u'baridationally'
        assert unknown_token not in idx.dictionary

        query_s = u' '.join(u'''

        3 unable to create proc entry license gpl description driver author eric
        depends 2 6 24 19 generic smp mod module acpi baridationally register driver
        proc acpi disabled acpi install notify acpi baridationally get status cache
        caches create proc entry baridationally generate proc event acpi evaluate
        object acpi remove notify remove proc entry acpi baridationally driver acpi
        acpi gcc gnu 4 2 3 ubuntu 4 2 3 gcc gnu 4 2 3 ubuntu 4 2 3 current stack
        pointer current stack pointer this module end usr src modules acpi include
        linux include asm include asm generic include acpi acpi c posix types 32 h
        types h types h h h h h
        '''.split())
        result = Query(query_string=query_s, idx=idx)
        assert 1 == len(result.query_runs)
        qr = result.query_runs[0]
        expected_qr0 = u' '.join(u'''
        3 unable to create proc entry license gpl description driver author eric
        depends 2 6 24 19 generic smp mod module acpi             register driver
        proc acpi disabled acpi install notify acpi               get status cache
        caches create proc entry                generate proc event acpi evaluate
        object acpi remove notify remove proc entry acpi             driver acpi
        acpi gcc gnu 4 2 3 ubuntu 4 2 3 gcc gnu 4 2 3 ubuntu 4 2 3 current stack
        pointer current stack pointer this module end usr src modules acpi include
        linux include asm include asm generic include acpi acpi c posix types 32 h
        types h types h h h h h
        '''.split())
        assert expected_qr0 == u' '.join(idx.tokens_by_tid[t]
                                         for t in qr.tokens)

        assert expected_qr0 == u' '.join(idx.tokens_by_tid[t]
                                         for p, t in enumerate(qr.tokens)
                                         if p in qr.matchables)

        # only gpl and gnu are is in high matchables
        expected = u'license gpl gnu gnu'
        assert expected == u' '.join(idx.tokens_by_tid[t]
                                     for p, t in enumerate(qr.tokens)
                                     if p in qr.high_matchables)

Esempio n. 47

0

Mostra file

File: test_query.py Progetto: ocabrisses/scancode-toolkit

 def test_query_run_tokens(self):
     query_s = u' '.join(u''' 3 unable to create proc entry license gpl
     description driver author eric depends 2 6 24 19 generic smp mod module acpi
     baridationally register driver proc acpi disabled acpi install notify acpi baridationally get
     status cache caches create proc entry baridationally generate proc event acpi evaluate
     object acpi remove notify remove proc entry acpi baridationally driver acpi acpi gcc gnu
     4 2 3 ubuntu 4 2 3 gcc gnu 4 2 3 ubuntu 4 2 3 current stack pointer current
     stack pointer this module end usr src modules acpi include linux include asm
     include asm generic include acpi acpi c posix types 32 h types h types h h h
     h h
     '''.split())
     idx = cache.get_index()
     result = Query(query_string=query_s, idx=idx)
     assert 1 == len(result.query_runs)
     qr = result.query_runs[0]
     # NOTE: this is not a token present in any rules or licenses
     unknown_tokens = ('baridationally',)
     assert unknown_tokens not in idx.dictionary
     assert u' '.join([t for t in query_s.split() if t not in unknown_tokens]) == u' '.join(idx.tokens_by_tid[t] for t in qr.tokens)

Esempio n. 48

0

Mostra file

File: test_detect.py Progetto: ocabrisses/scancode-toolkit

    def test_match_handles_negative_rules_and_does_not_match_negative_regions_properly(self):
        # note: this test relies on the negative rule: not-a-license_busybox_2.RULE
        # with this text:
        # "libbusybox is GPL, not LGPL, and exports no stable API that might act as a copyright barrier."
        # and relies on the short rules that detect GPL and LGPL
        idx = cache.get_index()
        # lines 3 and 4 should NOT be part of any matches
        # they should match the negative "not-a-license_busybox_2.RULE"
        negative_lines_not_to_match = 3, 4
        querys = u'''
            licensed under the LGPL license
            libbusybox is GPL, not LGPL, and exports no stable API
            that might act as a copyright barrier.
            for the license
            license: dual BSD/GPL
            '''
        matches = idx.match(query_string=querys)

        for match in matches:
            for line in negative_lines_not_to_match:
                assert line not in match.lines()

Esempio n. 49

0

Mostra file

File: test_query.py Progetto: ocabrisses/scancode-toolkit

    def test_query_run_tokens_matchable(self):
        idx = cache.get_index()
        # NOTE: this is not a token present in any rules or licenses
        unknown_token = u'baridationally'
        assert unknown_token not in idx.dictionary

        query_s = u' '.join(u'''

        3 unable to create proc entry license gpl description driver author eric
        depends 2 6 24 19 generic smp mod module acpi baridationally register driver
        proc acpi disabled acpi install notify acpi baridationally get status cache
        caches create proc entry baridationally generate proc event acpi evaluate
        object acpi remove notify remove proc entry acpi baridationally driver acpi
        acpi gcc gnu 4 2 3 ubuntu 4 2 3 gcc gnu 4 2 3 ubuntu 4 2 3 current stack
        pointer current stack pointer this module end usr src modules acpi include
        linux include asm include asm generic include acpi acpi c posix types 32 h
        types h types h h h h h
        '''.split())
        result = Query(query_string=query_s, idx=idx)
        assert 1 == len(result.query_runs)
        qr = result.query_runs[0]
        expected_qr0 = u' '.join(u'''
        3 unable to create proc entry license gpl description driver author eric
        depends 2 6 24 19 generic smp mod module acpi             register driver
        proc acpi disabled acpi install notify acpi               get status cache
        caches create proc entry                generate proc event acpi evaluate
        object acpi remove notify remove proc entry acpi             driver acpi
        acpi gcc gnu 4 2 3 ubuntu 4 2 3 gcc gnu 4 2 3 ubuntu 4 2 3 current stack
        pointer current stack pointer this module end usr src modules acpi include
        linux include asm include asm generic include acpi acpi c posix types 32 h
        types h types h h h h h
        '''.split())
        assert expected_qr0 == u' '.join(idx.tokens_by_tid[t] for t in qr.tokens)

        assert expected_qr0 == u' '.join(idx.tokens_by_tid[t] for p, t in enumerate(qr.tokens) if p in qr.matchables)

        # only gpl is in high matchables
        expected = u'gpl'
        assert expected == u' '.join(idx.tokens_by_tid[t] for p, t in enumerate(qr.tokens) if p in qr.high_matchables)

Esempio n. 50

0

Mostra file

File: test_detect.py Progetto: ocabrisses/scancode-toolkit

    def test_match_has_correct_line_positions_for_query_with_repeats(self):
        expected = [
            # licenses, match.lines(), qtext,
            ([u'apache-2.0'], (1, 2), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'),
            ([u'apache-2.0'], (3, 4), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'),
            ([u'apache-2.0'], (5, 6), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'),
            ([u'apache-2.0'], (7, 8), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'),
            ([u'apache-2.0'], (9, 10), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'),
        ]
        test_path = 'positions/license1.txt'

        test_location = self.get_test_loc(test_path)
        idx = cache.get_index()
        matches = idx.match(test_location)
        for i, match in enumerate(matches):
            ex_lics, ex_lines, ex_qtext = expected[i]
            qtext, _itext = get_texts(match, location=test_location, idx=idx)

            try:
                assert ex_lics == match.rule.licenses
                assert ex_lines == match.lines()
                assert ex_qtext == qtext
            except AssertionError:
                assert expected[i] == (match.rule.licenses, match.lines(), qtext)

Esempio n. 51

0

Mostra file

File: test_performance.py Progetto: ocabrisses/scancode-toolkit

 def test_match_license_performance_profiling_on_full_index_mixed_matching_long(self):
     # pre-index : we are profiling only the detection, not the indexing
     idx = cache.get_index()
     stats_file = 'license_match_mixed_matching_full_index_profile_log2.txt'
     locations = [self.get_test_loc(f) for f in ['perf/test1.txt', 'perf/whatever.py', 'perf/udll.cxx']]
     self.profile_match(idx, locations, stats_file)

Esempio n. 52

0

Mostra file

File: cli.py Progetto: ocabrisses/scancode-toolkit

def scan(input_path,
         scanners,
         verbose=False, quiet=False,
         processes=1, timeout=DEFAULT_TIMEOUT,
         diag=False,
         scans_cache_class=None,
         strip_root=False,
         full_root=False,
         pre_scan_plugins=()):
    """
    Return a tuple of (files_count, scan_results, success) where
    scan_results is an iterable and success is a boolean.

    Run each requested scan proper: each individual file scan is cached
    on disk to free memory. Then the whole set of scans is loaded from
    the cache and streamed at the end.
    """
    assert scans_cache_class
    scan_summary = OrderedDict()
    scan_summary['scanned_path'] = input_path
    scan_summary['processes'] = processes

    # Display scan start details
    ############################
    # FIXME: it does not make sense to use tuple and positional values
    scans = [k for k, v in scanners.items() if v[0]]
    _scans = ', '.join(scans)
    if not quiet:
        echo_stderr('Scanning files for: %(_scans)s with %(processes)d process(es)...' % locals())

    scan_summary['scans'] = scans[:]
    scan_start = time()
    indexing_time = 0
    # FIXME: It does not make sense to use tuple and positional values
    with_licenses, _ = scanners.get('licenses', (False, ''))
    if with_licenses:
        # build index outside of the main loop for speed
        # this also ensures that forked processes will get the index on POSIX naturally
        if not quiet:
            echo_stderr('Building license detection index...', fg='green', nl=False)
        from licensedcode.cache import get_index
        get_index(False)
        indexing_time = time() - scan_start
        if not quiet:
            echo_stderr('Done.', fg='green', nl=True)

    scan_summary['indexing_time'] = indexing_time

    pool = None

    resources = resource_paths(input_path, diag, scans_cache_class, pre_scan_plugins=pre_scan_plugins)
    paths_with_error = []
    files_count = 0

    logfile_path = scans_cache_class().cache_files_log
    if on_linux:
        file_logger = partial(open, logfile_path, 'wb')
    else:
        file_logger = partial(codecs.open, logfile_path, 'w', encoding='utf-8')

    with file_logger() as logfile_fd:

        logged_resources = _resource_logger(logfile_fd, resources)

        scanit = partial(_scanit, scanners=scanners, scans_cache_class=scans_cache_class,
                         diag=diag, timeout=timeout, processes=processes)

        max_file_name_len = compute_fn_max_len()
        # do not display a file name in progress bar if there is less than 5 chars available.
        display_fn = bool(max_file_name_len > 10)
        try:
            if processes:
                # maxtasksperchild helps with recycling processes in case of leaks
                pool = get_pool(processes=processes, maxtasksperchild=1000)
                # Using chunksize is documented as much more efficient in the Python doc.
                # Yet "1" still provides a better and more progressive feedback.
                # With imap_unordered, results are returned as soon as ready and out of order.
                scanned_files = pool.imap_unordered(scanit, logged_resources, chunksize=1)
                pool.close()
            else:
                # no multiprocessing with processes=0
                scanned_files = imap(scanit, logged_resources)
                if not quiet:
                    echo_stderr('Disabling multi-processing and multi-threading...', fg='yellow')

            if not quiet:
                echo_stderr('Scanning files...', fg='green')

            def scan_event(item):
                """Progress event displayed each time a file is scanned"""
                if quiet or not item or not display_fn:
                    return ''
                _scan_success, _scanned_path = item
                _scanned_path = unicode(toascii(_scanned_path))
                if verbose:
                    _progress_line = _scanned_path
                else:
                    _progress_line = fixed_width_file_name(_scanned_path, max_file_name_len)
                return style('Scanned: ') + style(_progress_line, fg=_scan_success and 'green' or 'red')

            scanning_errors = []
            files_count = 0
            with progressmanager(
                scanned_files, item_show_func=scan_event, show_pos=True,
                verbose=verbose, quiet=quiet, file=sys.stderr) as scanned:
                while True:
                    try:
                        result = scanned.next()
                        scan_success, scanned_rel_path = result
                        if not scan_success:
                            paths_with_error.append(scanned_rel_path)
                        files_count += 1
                    except StopIteration:
                        break
                    except KeyboardInterrupt:
                        print('\nAborted with Ctrl+C!')
                        if pool:
                            pool.terminate()
                        break
        finally:
            if pool:
                # ensure the pool is really dead to work around a Python 2.7.3 bug:
                # http://bugs.python.org/issue15101
                pool.terminate()

    # TODO: add stats to results somehow

    # Compute stats
    ##########################
    scan_summary['files_count'] = files_count
    scan_summary['files_with_errors'] = paths_with_error
    total_time = time() - scan_start
    scanning_time = total_time - indexing_time
    scan_summary['total_time'] = total_time
    scan_summary['scanning_time'] = scanning_time

    files_scanned_per_second = round(float(files_count) / scanning_time , 2)
    scan_summary['files_scanned_per_second'] = files_scanned_per_second

    if not quiet:
        # Display stats
        ##########################
        echo_stderr('Scanning done.', fg=paths_with_error and 'red' or 'green')
        if paths_with_error:
            if diag:
                echo_stderr('Some files failed to scan properly:', fg='red')
                # iterate cached results to collect all scan errors
                cached_scan = scans_cache_class()
                root_dir = _get_root_dir(input_path, strip_root, full_root)
                scan_results = cached_scan.iterate(scans, root_dir, paths_subset=paths_with_error)
                for scan_result in scan_results:
                    errored_path = scan_result.get('path', '')
                    echo_stderr('Path: ' + errored_path, fg='red')
                    for error in scan_result.get('scan_errors', []):
                        for emsg in error.splitlines(False):
                            echo_stderr('  ' + emsg)
                    echo_stderr('')
            else:
                echo_stderr('Some files failed to scan properly. Use the --diag option for additional details:', fg='red')
                for errored_path in paths_with_error:
                    echo_stderr(' ' + errored_path, fg='red')

        echo_stderr('Scan statistics: %(files_count)d files scanned in %(total_time)ds.' % locals())
        echo_stderr('Scan options:    %(_scans)s with %(processes)d process(es).' % locals())
        echo_stderr('Scanning speed:  %(files_scanned_per_second)s files per sec.' % locals())
        echo_stderr('Scanning time:   %(scanning_time)ds.' % locals())
        echo_stderr('Indexing time:   %(indexing_time)ds.' % locals(), reset=True)

    success = not paths_with_error
    # finally return an iterator on cached results
    cached_scan = scans_cache_class()
    root_dir = _get_root_dir(input_path, strip_root, full_root)
    return files_count, cached_scan.iterate(scans, root_dir), success

Esempio n. 53

0

Mostra file

File: test_query.py Progetto: ocabrisses/scancode-toolkit

 def test_query_from_binary_lkms_1(self):
     location = self.get_test_loc('query/ath_pci.ko')
     idx = cache.get_index()
     result = Query(location, idx=idx)
     assert len(result.query_runs) < 15

Esempio n. 54

0

Mostra file

File: test_performance.py Progetto: ocabrisses/scancode-toolkit

 def test_match_license_performance_profiling_on_full_index_with_spurious_filtered_seq_matches(self):
     # pre-index : we are profiling only the detection, not the indexing
     idx = cache.get_index()
     stats_file = 'license_match_mixed_matching_full_index_profile_filtered_seq_matches_log.txt'
     locations = [self.get_test_loc(f) for f in ['perf/bsd-new_37.txt']]
     self.profile_match(idx, locations, stats_file)

Esempio n. 55

0

Mostra file

File: test_performance.py Progetto: ocabrisses/scancode-toolkit

 def test_match_license_performance_profiling_on_full_index_small_binary_lkm2(self):
     # pre-index : we are profiling only the detection, not the indexing
     idx = cache.get_index()
     stats_file = 'license_match_full_index_profile_log.txt'
     locations = [self.get_test_loc('perf/ath_pci.ko')]
     self.profile_match(idx, locations, stats_file)