def get_copyrights(location, deadline=sys.maxsize, **kwargs): """ Return a mapping with a single 'copyrights' key with a value that is a list of mappings for copyright detected in the file at `location`. """ from cluecode.copyrights import detect_copyrights copyrights = [] holders = [] authors = [] for dtype, value, start, end in detect_copyrights(location, deadline=deadline): if dtype == 'copyrights': copyrights.append( OrderedDict([('value', value), ('start_line', start), ('end_line', end)])) elif dtype == 'holders': holders.append( OrderedDict([('value', value), ('start_line', start), ('end_line', end)])) elif dtype == 'authors': authors.append( OrderedDict([('value', value), ('start_line', start), ('end_line', end)])) results = OrderedDict([ ('copyrights', copyrights), ('holders', holders), ('authors', authors), ]) return results
def get_copyrights( location, deadline=sys.maxsize, **kwargs, ): """ Return a mapping with a single 'copyrights' key with a value that is a list of mappings for copyright detected in the file at `location`. """ from cluecode.copyrights import detect_copyrights from cluecode.copyrights import Detection detections = detect_copyrights( location, include_copyrights=True, include_holders=True, include_authors=True, include_copyright_years=True, include_copyright_allrights=False, deadline=deadline, ) copyrights, holders, authors = Detection.split(detections, to_dict=True) results = dict([ ('copyrights', copyrights), ('holders', holders), ('authors', authors), ]) # TODO: do something if we missed the deadline return results
def test_detect_with_lines_only_holders(self): location = self.get_test_loc('copyrights_basic/essential_smoke-ibm_c.c') expected = [ ('holders', u'IBM and others', 6, 6), ('holders', u'Eclipse, IBM and others', 8, 8) ] results = list(copyrights_module.detect_copyrights(location, copyrights=False, authors=False)) assert results == expected
def test_detect_with_lines(self): location = self.get_test_loc( 'copyrights_basic/essential_smoke-ibm_c.c') expected = [('copyrights', u'Copyright IBM and others (c) 2008', 6, 6), ('holders', u'IBM and others', 6, 6), ('copyrights', u'Copyright Eclipse, IBM and others', 8, 8), ('holders', u'Eclipse, IBM and others', 8, 8), ('copyrights', u'(c) 2008', 8, 8)] results = list(copyrights_module.detect_copyrights(location)) assert expected == results
def test_detect_with_lines(self): location = self.get_test_loc('copyrights_basic/essential_smoke-ibm_c.c') expected = [ copyrights.CopyrightDetection('Copyright IBM and others (c) 2008', 6, 6), copyrights.HolderDetection('IBM and others', 6, 6), copyrights.CopyrightDetection('Copyright Eclipse, IBM and others', 8, 8), copyrights.HolderDetection('Eclipse, IBM and others', 8, 8), copyrights.CopyrightDetection('(c) 2008', 8, 8) ] results = list(copyrights.detect_copyrights(location)) assert results == expected
def get_copyrights(location=None): """ Yield dictionaries of copyright data detected in the file at location. Each item contains a list of copyright statements and a start and end line. """ from cluecode.copyrights import detect_copyrights for copyrights, _, _, _, start_line, end_line in detect_copyrights(location): if not copyrights: continue yield {"statements": copyrights, "start_line": start_line, "end_line": end_line}
def check_full_detections(expected, test_file): """ Run detection of copyright on the test_file, checking the results match the expected list of values. """ results = list(copyrights.detect_copyrights( test_file, include_copyrights=True, include_authors=False, include_holders=False )) assert results == expected
def test_detect(self): location = self.get_test_loc('copyrights_basic/essential_smoke-ibm_c.c') expected = [ copyrights.CopyrightDetection('Copyright IBM and others (c) 2008', 6, 6), copyrights.CopyrightDetection('Copyright Eclipse, IBM and others', 8, 8), copyrights.CopyrightDetection('(c) 2008', 8, 8), ] results = list(copyrights.detect_copyrights( location, include_holders=False, include_authors=False, )) assert results == expected
def check_detection_with_lines(expected, test_file): """ Run detection of copyright on the test_file, checking the results match the expected list of values. """ detections = copyrights_module.detect_copyrights(test_file, copyrights=True, authors=False, holders=False) results = [(statement, start, end) for _t, statement, start, end in detections] assert expected == results
def test_detect_with_lines_only_holders(self): location = self.get_test_loc('copyrights_basic/essential_smoke-ibm_c.c') expected = [ copyrights.HolderDetection('IBM and others', 6, 6), copyrights.HolderDetection('Eclipse, IBM and others', 8, 8) ] results = list(copyrights.detect_copyrights( location, include_copyrights=False, include_holders=True, include_authors=False, )) assert results == expected
def copyright_detector(location): """ Return lists of detected copyrights, authors & holders in file at location. """ if location: from cluecode.copyrights import detect_copyrights copyrights = [] copyrights_append = copyrights.append for dtype, value, _start, _end in detect_copyrights(location): if dtype == 'copyrights': copyrights_append(value) return copyrights
def cli(copyrights_file): """ Create copyright and holder tests rules from a text file that has one line per test. The expected holder and copyright are from detection. For instance: Copyright (c) All the Raige Dog Salon """ from cluecode.copyrights import detect_copyrights from cluecode_test_utils import CopyrightTest # NOQA test_data_dir = path.join(cluecode_test_utils.test_env.test_data_dir, 'generated') existing = build_dupe_index() print() for text in load_data(copyrights_file): if text in existing: print('Copyright Test skipped, existing:', text) print() continue test_file_loc = find_test_file_loc(test_data_dir) with io.open(test_file_loc, 'w') as tf: tf.write(text) # collect expected values copyrights = [] holders = [] authors = [] for dtype, value, _start, _end in detect_copyrights([text]): if dtype == 'copyrights': copyrights.append(value) elif dtype == 'holders': holders.append(value) elif dtype == 'authors': authors.append(value) test = CopyrightTest( what=['holders', 'copyrights', 'authors'], copyrights=copyrights, holders=holders, authors=authors, ) test.test_file = test_file_loc test.data_file = test_file_loc + '.yml' test.dump() existing.add(text) print('Copyright Test added:', text) print()
def get_copyrights(location): """ Yield mappings of copyright data detected in the file at `location`. """ from cluecode.copyrights import detect_copyrights for copyrights, authors, _years, holders, start_line, end_line in detect_copyrights(location): result = OrderedDict() # FIXME: we should call this copyright instead, and yield one item per statement result['statements'] = copyrights result['holders'] = holders result['authors'] = authors result['start_line'] = start_line result['end_line'] = end_line yield result
def get_copyrights(location): """ Yield mappings of copyright data detected in the file at `location`. """ from cluecode.copyrights import detect_copyrights for copyrights, authors, _years, holders, start_line, end_line in detect_copyrights( location): result = OrderedDict() # FIXME: we should call this copyright instead, and yield one item per statement result['statements'] = copyrights result['holders'] = holders result['authors'] = authors result['start_line'] = start_line result['end_line'] = end_line yield result
def get_copyrights(location=None): """ Yield dictionaries of copyright data detected in the file at location. Each item contains a list of copyright statements and a start and end line. """ from cluecode.copyrights import detect_copyrights for copyrights, _, _, _, start_line, end_line in detect_copyrights( location): if not copyrights: continue yield { 'statements': copyrights, 'start_line': start_line, 'end_line': end_line, }
def cli(copyrights_file): """ Create copyright and holder tests rules from a text file that has one line per test. The expected holder and copyright are from detection. For instance: Copyright (c) All the Raige Dog Salon """ from cluecode.copyrights import detect_copyrights from cluecode.copyrights import Detection from cluecode_test_utils import CopyrightTest # NOQA test_data_dir = path.join(cluecode_test_utils.test_env.test_data_dir, "generated") existing = build_dupe_index() print() for text in load_data(copyrights_file): if text in existing: print("Copyright Test skipped, existing:", text) print() continue test_file_loc = find_test_file_loc(test_data_dir) with io.open(test_file_loc, "w") as tf: tf.write(text) # collect expected values detections = detect_copyrights(test_file_loc) copyrights, holders, authors = Detection.split_values(detections) test = CopyrightTest( what=["holders", "copyrights", "authors"], copyrights=copyrights, holders=holders, authors=authors, ) test.test_file = test_file_loc test.data_file = test_file_loc + ".yml" test.dump() existing.add(text) print("Copyright Test added:", text) print()
def get_copyrights(location): """ Yield an iterable of dictionaries of copyright data detected in the file at location. Each item contains a list of copyright statements and a start and end line. """ from cluecode.copyrights import detect_copyrights for copyrights, authors, _years, holders, start_line, end_line in detect_copyrights(location): if not copyrights: continue result = OrderedDict() # FIXME: we should call this copyright instead, and yield one item per statement result['statements'] = copyrights result['holders'] = holders result['authors'] = authors result['start_line'] = start_line result['end_line'] = end_line yield result
def update_ignorables(licensish, verbose=False, dump=True): """ Collect, update and save the ignorable_* attributes of a `licensish` Rule or License object. """ location = licensish.text_file if verbose: print('Processing:', 'file://' + location) if not exists(location): return licensish # collect and set ignorable copyrights, holders and authors from cluecode.copyrights import detect_copyrights copyrights = set() holders = set() authors = set() for dtype, value, _start, _end in detect_copyrights(location): if dtype == 'copyrights': copyrights.add(value) elif dtype == 'holders': holders.add(value) elif dtype == 'authors': authors.add(value) licensish.ignorable_copyrights = sorted(copyrights) licensish.ignorable_holders = sorted(holders) licensish.ignorable_authors = sorted(authors) # collect and set ignrable emails and urls from cluecode.finder import find_urls from cluecode.finder import find_emails urls = set(u for (u, _ln) in find_urls(location) if u) licensish.ignorable_urls = sorted(urls) emails = set(u for (u, _ln) in find_emails(location) if u) licensish.ignorable_emails = sorted(emails) if dump: licensish.dump() return licensish
def get_copyrights(location): """ Yield an iterable of dictionaries of copyright data detected in the file at location. Each item contains a list of copyright statements and a start and end line. """ from cluecode.copyrights import detect_copyrights for copyrights, authors, _years, holders, start_line, end_line in detect_copyrights( location): if not copyrights: continue result = OrderedDict() # FIXME: we should call this copyright instead, and yield one item per statement result['statements'] = copyrights result['holders'] = holders result['authors'] = authors result['start_line'] = start_line result['end_line'] = end_line yield result
def closure_test_function(*args, **kwargs): detections = detect_copyrights(test_file) copyrights, holders, authors = Detection.split_values(detections) holders_summary = [] if 'holders_summary' in test.what: holders_summary = as_sorted_mapping(tally_persons(holders)) copyrights_summary = [] if 'copyrights_summary' in test.what: copyrights_summary = as_sorted_mapping(tally_copyrights(copyrights)) authors_summary = [] if 'authors_summary' in test.what: authors_summary = as_sorted_mapping(tally_persons(authors)) results = dict( copyrights=copyrights, authors=authors, holders=holders, holders_summary=holders_summary, copyrights_summary=copyrights_summary, authors_summary=authors_summary, ) expected_yaml = test.dumps() for wht in test.what: setattr(test, wht, results.get(wht)) results_yaml = test.dumps() if regen: test.dump() if expected_yaml != results_yaml: expected_yaml = ( 'data file: file://' + data_file + '\ntest file: file://' + test_file + '\n' ) + expected_yaml assert results_yaml == expected_yaml
def get_detections(test_file): detections = detect_copyrights(test_file) return Detection.split_values(detections)