def test_get_emails(self): test_file = self.get_test_loc('api/email/3w-xxxx.c') results = api.get_emails(test_file) expected = dict(emails=[ OrderedDict([(u'email', u'*****@*****.**'), (u'start_line', 1), (u'end_line', 1)]), OrderedDict([(u'email', u'*****@*****.**'), (u'start_line', 3), (u'end_line', 3)]), OrderedDict([(u'email', u'*****@*****.**'), (u'start_line', 5), (u'end_line', 5)]) ]) assert expected == results results = api.get_emails(test_file, threshold=0) assert expected == results
def test_get_emails_with_threshold(self): test_file = self.get_test_loc('api/email/3w-xxxx.c') results = api.get_emails(test_file, threshold=1) expected = dict(emails=[ OrderedDict([(u'email', u'*****@*****.**'), (u'start_line', 1), (u'end_line', 1)]), ]) assert expected == results
def check_ignorable_clues(rule): """ Validate that all ignorable clues defined in a `rule` Rule object are properly detected in that rule text file. """ from itertools import chain from scancode import api text_file = rule.text_file # scan clues scan_data = {} scan_data.update(api.get_copyrights(text_file)) scan_data.update(api.get_urls(text_file, threshold=0)) scan_data.update(api.get_emails(text_file, threshold=0)) results = OrderedDict() for what, detections in scan_data.items(): # remove lines for detected in detections: detected.pop('start_line', None) detected.pop('end_line', None) # remove keys and keep only values e.g. a list of detected copyrights, # emails, etc detections = sorted( set(chain(*(detected.values() for detected in detections)))) results['ignorable_' + what] = detections # collect ignorables expected = OrderedDict([ ('ignorable_copyrights', rule.ignorable_copyrights or []), ('ignorable_holders', rule.ignorable_holders or []), ('ignorable_authors', rule.ignorable_authors or []), ('ignorable_urls', rule.ignorable_urls or []), ('ignorable_emails', rule.ignorable_emails or []), ]) results = OrderedDict([(k, v) for k, v in sorted(results.items()) if v]) expected = OrderedDict([(k, v) for k, v in sorted(expected.items()) if v]) try: assert expected == results except: # On failure, we compare againto get additional failure details such as # a clickable text_file path data_file = rule.data_file if not data_file: data_file = text_file.replace('.LICENSE', '.yml') results['files'] = [ 'file://{data_file}'.format(**locals()), 'file://{text_file}'.format(**locals()), ] # this assert will always fail and provide a more detailed failure trace assert saneyaml.dump(expected) == saneyaml.dump(results)