def test_get_urls(self):
     test_file = self.get_test_loc('api/url/IMarkerActionFilter.java')
     results = api.get_urls(test_file)
     expected = dict(urls=[
         OrderedDict([(u'url', u'http://www.eclipse.org/legal/epl-v10.html'), (u'start_line', 2), (u'end_line', 2)]),
         OrderedDict([(u'url', u'https://github.com/rpm-software-management'), (u'start_line', 4), (u'end_line', 4)]),
         OrderedDict([(u'url', u'https://gitlab.com/Conan_Kudo'), (u'start_line', 6), (u'end_line', 6)]),
     ])
     assert expected == results
     results = api.get_urls(test_file, threshold=0)
     assert expected == results
 def test_get_urls_with_threshold(self):
     test_file = self.get_test_loc('api/url/IMarkerActionFilter.java')
     expected = dict(urls=[
         OrderedDict([(u'url', u'http://www.eclipse.org/legal/epl-v10.html'), (u'start_line', 2), (u'end_line', 2)])
     ])
     results = api.get_urls(test_file, threshold=1)
     assert expected == results
Example #3
0
def check_ignorable_clues(rule):
    """
    Validate that all ignorable clues defined in a `rule` Rule object are
    properly detected in that rule text file.
    """
    from itertools import chain
    from scancode import api

    text_file = rule.text_file

    # scan clues
    scan_data = {}
    scan_data.update(api.get_copyrights(text_file))
    scan_data.update(api.get_urls(text_file, threshold=0))
    scan_data.update(api.get_emails(text_file, threshold=0))

    results = OrderedDict()
    for what, detections in scan_data.items():
        # remove lines
        for detected in detections:
            detected.pop('start_line', None)
            detected.pop('end_line', None)

        # remove keys and keep only values e.g. a list of detected copyrights,
        # emails, etc
        detections = sorted(
            set(chain(*(detected.values() for detected in detections))))
        results['ignorable_' + what] = detections

    # collect ignorables
    expected = OrderedDict([
        ('ignorable_copyrights', rule.ignorable_copyrights or []),
        ('ignorable_holders', rule.ignorable_holders or []),
        ('ignorable_authors', rule.ignorable_authors or []),
        ('ignorable_urls', rule.ignorable_urls or []),
        ('ignorable_emails', rule.ignorable_emails or []),
    ])

    results = OrderedDict([(k, v) for k, v in sorted(results.items()) if v])
    expected = OrderedDict([(k, v) for k, v in sorted(expected.items()) if v])

    try:
        assert expected == results
    except:
        # On failure, we compare againto get additional failure details such as
        # a clickable text_file path

        data_file = rule.data_file
        if not data_file:
            data_file = text_file.replace('.LICENSE', '.yml')
        results['files'] = [
            'file://{data_file}'.format(**locals()),
            'file://{text_file}'.format(**locals()),
        ]
        # this assert will always fail and provide a more detailed failure trace
        assert saneyaml.dump(expected) == saneyaml.dump(results)