Python detect_copyrightsの例、cluecode.copyrights.detect_copyrights Pythonの例

コード例 #1

0

ファイルを表示

ファイル: api.py プロジェクト: SmartsYoung/FenixscanX

def get_copyrights(location, deadline=sys.maxsize, **kwargs):
    """
    Return a mapping with a single 'copyrights' key with a value that is a list
    of mappings for copyright detected in the file at `location`.
    """
    from cluecode.copyrights import detect_copyrights

    copyrights = []
    holders = []
    authors = []

    for dtype, value, start, end in detect_copyrights(location,
                                                      deadline=deadline):

        if dtype == 'copyrights':
            copyrights.append(
                OrderedDict([('value', value), ('start_line', start),
                             ('end_line', end)]))
        elif dtype == 'holders':
            holders.append(
                OrderedDict([('value', value), ('start_line', start),
                             ('end_line', end)]))
        elif dtype == 'authors':
            authors.append(
                OrderedDict([('value', value), ('start_line', start),
                             ('end_line', end)]))

    results = OrderedDict([
        ('copyrights', copyrights),
        ('holders', holders),
        ('authors', authors),
    ])

    return results

コード例 #2

0

ファイルを表示

def get_copyrights(
    location,
    deadline=sys.maxsize,
    **kwargs,
):
    """
    Return a mapping with a single 'copyrights' key with a value that is a list
    of mappings for copyright detected in the file at `location`.
    """
    from cluecode.copyrights import detect_copyrights
    from cluecode.copyrights import Detection

    detections = detect_copyrights(
        location,
        include_copyrights=True,
        include_holders=True,
        include_authors=True,
        include_copyright_years=True,
        include_copyright_allrights=False,
        deadline=deadline,
    )

    copyrights, holders, authors = Detection.split(detections, to_dict=True)

    results = dict([
        ('copyrights', copyrights),
        ('holders', holders),
        ('authors', authors),
    ])

    # TODO: do something if we missed the deadline
    return results

コード例 #3

0

ファイルを表示

ファイル: test_copyrights_basic.py プロジェクト: akugarg/scancode-toolkit

 def test_detect_with_lines_only_holders(self):
     location = self.get_test_loc('copyrights_basic/essential_smoke-ibm_c.c')
     expected = [
         ('holders', u'IBM and others', 6, 6),
         ('holders', u'Eclipse, IBM and others', 8, 8)
     ]
     results = list(copyrights_module.detect_copyrights(location, copyrights=False, authors=False))
     assert results == expected

コード例 #4

0

ファイルを表示

 def test_detect_with_lines(self):
     location = self.get_test_loc(
         'copyrights_basic/essential_smoke-ibm_c.c')
     expected = [('copyrights', u'Copyright IBM and others (c) 2008', 6, 6),
                 ('holders', u'IBM and others', 6, 6),
                 ('copyrights', u'Copyright Eclipse, IBM and others', 8, 8),
                 ('holders', u'Eclipse, IBM and others', 8, 8),
                 ('copyrights', u'(c) 2008', 8, 8)]
     results = list(copyrights_module.detect_copyrights(location))
     assert expected == results

コード例 #5

0

ファイルを表示

ファイル: test_copyrights_basic.py プロジェクト: sthagen/scancode-toolkit

 def test_detect_with_lines(self):
     location = self.get_test_loc('copyrights_basic/essential_smoke-ibm_c.c')
     expected = [
         copyrights.CopyrightDetection('Copyright IBM and others (c) 2008', 6, 6),
         copyrights.HolderDetection('IBM and others', 6, 6),
         copyrights.CopyrightDetection('Copyright Eclipse, IBM and others', 8, 8),
         copyrights.HolderDetection('Eclipse, IBM and others', 8, 8),
         copyrights.CopyrightDetection('(c) 2008', 8, 8)
     ]
     results = list(copyrights.detect_copyrights(location))
     assert results == expected

コード例 #6

0

ファイルを表示

ファイル: api.py プロジェクト: pombredanne/scancode-toolkit

def get_copyrights(location=None):
    """
    Yield dictionaries of copyright data detected in the file at location.
    Each item contains a list of copyright statements and a start and end line.
    """
    from cluecode.copyrights import detect_copyrights

    for copyrights, _, _, _, start_line, end_line in detect_copyrights(location):
        if not copyrights:
            continue
        yield {"statements": copyrights, "start_line": start_line, "end_line": end_line}

コード例 #7

0

ファイルを表示

ファイル: test_copyrights_basic.py プロジェクト: sthagen/scancode-toolkit

def check_full_detections(expected, test_file):
    """
    Run detection of copyright on the test_file, checking the results
    match the expected list of values.
    """
    results = list(copyrights.detect_copyrights(
        test_file,
        include_copyrights=True,
        include_authors=False,
        include_holders=False
    ))
    assert results == expected

コード例 #8

0

ファイルを表示

ファイル: test_copyrights_basic.py プロジェクト: sthagen/scancode-toolkit

 def test_detect(self):
     location = self.get_test_loc('copyrights_basic/essential_smoke-ibm_c.c')
     expected = [
         copyrights.CopyrightDetection('Copyright IBM and others (c) 2008', 6, 6),
         copyrights.CopyrightDetection('Copyright Eclipse, IBM and others', 8, 8),
         copyrights.CopyrightDetection('(c) 2008', 8, 8),
     ]
     results = list(copyrights.detect_copyrights(
         location,
         include_holders=False,
         include_authors=False,
     ))
     assert results == expected

コード例 #9

0

ファイルを表示

def check_detection_with_lines(expected, test_file):
    """
    Run detection of copyright on the test_file, checking the results
    match the expected list of values.
    """
    detections = copyrights_module.detect_copyrights(test_file,
                                                     copyrights=True,
                                                     authors=False,
                                                     holders=False)

    results = [(statement, start, end)
               for _t, statement, start, end in detections]
    assert expected == results

コード例 #10

0

ファイルを表示

ファイル: test_copyrights_basic.py プロジェクト: sthagen/scancode-toolkit

 def test_detect_with_lines_only_holders(self):
     location = self.get_test_loc('copyrights_basic/essential_smoke-ibm_c.c')
     expected = [
         copyrights.HolderDetection('IBM and others', 6, 6),
         copyrights.HolderDetection('Eclipse, IBM and others', 8, 8)
     ]
     results = list(copyrights.detect_copyrights(
         location,
         include_copyrights=False,
         include_holders=True,
         include_authors=False,
     ))
     assert results == expected

コード例 #11

0

ファイルを表示

ファイル: debian_copyright.py プロジェクト: yangjie11/scancode-toolkit

def copyright_detector(location):
    """
    Return lists of detected copyrights, authors & holders in file at location.
    """
    if location:
        from cluecode.copyrights import detect_copyrights
        copyrights = []
        copyrights_append = copyrights.append

        for dtype, value, _start, _end in detect_copyrights(location):
            if dtype == 'copyrights':
                copyrights_append(value)
        return copyrights

コード例 #12

0

ファイルを表示

def cli(copyrights_file):
    """
    Create copyright and holder tests rules from a text file that has one line per test.
    The expected holder and copyright are from detection.
    For instance:
        Copyright (c) All the Raige Dog Salon
    """
    from cluecode.copyrights import detect_copyrights
    from cluecode_test_utils import CopyrightTest  # NOQA

    test_data_dir = path.join(cluecode_test_utils.test_env.test_data_dir,
                              'generated')

    existing = build_dupe_index()

    print()

    for text in load_data(copyrights_file):
        if text in existing:
            print('Copyright Test skipped, existing:', text)
            print()
            continue

        test_file_loc = find_test_file_loc(test_data_dir)
        with io.open(test_file_loc, 'w') as tf:
            tf.write(text)

        # collect expected values
        copyrights = []
        holders = []
        authors = []
        for dtype, value, _start, _end in detect_copyrights([text]):
            if dtype == 'copyrights':
                copyrights.append(value)
            elif dtype == 'holders':
                holders.append(value)
            elif dtype == 'authors':
                authors.append(value)

        test = CopyrightTest(
            what=['holders', 'copyrights', 'authors'],
            copyrights=copyrights,
            holders=holders,
            authors=authors,
        )
        test.test_file = test_file_loc
        test.data_file = test_file_loc + '.yml'
        test.dump()
        existing.add(text)
        print('Copyright Test added:', text)
        print()

コード例 #13

0

ファイルを表示

ファイル: api.py プロジェクト: ocabrisses/scancode-toolkit

def get_copyrights(location):
    """
    Yield mappings of copyright data detected in the file at `location`.
    """
    from cluecode.copyrights import detect_copyrights

    for copyrights, authors, _years, holders, start_line, end_line in detect_copyrights(location):
        result = OrderedDict()
        # FIXME: we should call this copyright instead, and yield one item per statement
        result['statements'] = copyrights
        result['holders'] = holders
        result['authors'] = authors
        result['start_line'] = start_line
        result['end_line'] = end_line
        yield result

コード例 #14

0

ファイルを表示

ファイル: api.py プロジェクト: yudhik11/scancode-toolkit

def get_copyrights(location):
    """
    Yield mappings of copyright data detected in the file at `location`.
    """
    from cluecode.copyrights import detect_copyrights

    for copyrights, authors, _years, holders, start_line, end_line in detect_copyrights(
            location):
        result = OrderedDict()
        # FIXME: we should call this copyright instead, and yield one item per statement
        result['statements'] = copyrights
        result['holders'] = holders
        result['authors'] = authors
        result['start_line'] = start_line
        result['end_line'] = end_line
        yield result

コード例 #15

0

ファイルを表示

ファイル: api.py プロジェクト: praveen-pk/scancode-toolkit

def get_copyrights(location=None):
    """
    Yield dictionaries of copyright data detected in the file at location.
    Each item contains a list of copyright statements and a start and end line.
    """
    from cluecode.copyrights import detect_copyrights

    for copyrights, _, _, _, start_line, end_line in detect_copyrights(
            location):
        if not copyrights:
            continue
        yield {
            'statements': copyrights,
            'start_line': start_line,
            'end_line': end_line,
        }

コード例 #16

0

ファイルを表示

ファイル: buildcopytests.py プロジェクト: sthagen/scancode-toolkit

def cli(copyrights_file):
    """
    Create copyright and holder tests rules from a text file that has one line per test.
    The expected holder and copyright are from detection.
    For instance:
        Copyright (c) All the Raige Dog Salon
    """
    from cluecode.copyrights import detect_copyrights
    from cluecode.copyrights import Detection

    from cluecode_test_utils import CopyrightTest  # NOQA

    test_data_dir = path.join(cluecode_test_utils.test_env.test_data_dir,
                              "generated")

    existing = build_dupe_index()

    print()

    for text in load_data(copyrights_file):
        if text in existing:
            print("Copyright Test skipped, existing:", text)
            print()
            continue

        test_file_loc = find_test_file_loc(test_data_dir)
        with io.open(test_file_loc, "w") as tf:
            tf.write(text)

        # collect expected values
        detections = detect_copyrights(test_file_loc)
        copyrights, holders, authors = Detection.split_values(detections)

        test = CopyrightTest(
            what=["holders", "copyrights", "authors"],
            copyrights=copyrights,
            holders=holders,
            authors=authors,
        )
        test.test_file = test_file_loc
        test.data_file = test_file_loc + ".yml"
        test.dump()
        existing.add(text)
        print("Copyright Test added:", text)
        print()

コード例 #17

0

ファイルを表示

ファイル: api.py プロジェクト: balusarakesh/dje_license_search

def get_copyrights(location):
    """
    Yield an iterable of dictionaries of copyright data detected in the file at
    location. Each item contains a list of copyright statements and a start and
    end line.
    """
    from cluecode.copyrights import detect_copyrights

    for copyrights, authors, _years, holders, start_line, end_line in detect_copyrights(location):
        if not copyrights:
            continue
        result = OrderedDict()
        # FIXME: we should call this copyright instead, and yield one item per statement
        result['statements'] = copyrights
        result['holders'] = holders
        result['authors'] = authors
        result['start_line'] = start_line
        result['end_line'] = end_line
        yield result

コード例 #18

0

ファイルを表示

ファイル: models.py プロジェクト: victorcruceru/scancode-toolkit

def update_ignorables(licensish, verbose=False, dump=True):
    """
    Collect, update and save the ignorable_* attributes of a `licensish` Rule or
    License object.
    """
    location = licensish.text_file

    if verbose:
        print('Processing:', 'file://' + location)

    if not exists(location):
        return licensish

    # collect and set ignorable copyrights, holders and authors
    from cluecode.copyrights import detect_copyrights
    copyrights = set()
    holders = set()
    authors = set()

    for dtype, value, _start, _end in detect_copyrights(location):
        if dtype == 'copyrights':
            copyrights.add(value)
        elif dtype == 'holders':
            holders.add(value)
        elif dtype == 'authors':
            authors.add(value)

    licensish.ignorable_copyrights = sorted(copyrights)
    licensish.ignorable_holders = sorted(holders)
    licensish.ignorable_authors = sorted(authors)

    # collect and set ignrable emails and urls
    from cluecode.finder import find_urls
    from cluecode.finder import find_emails

    urls = set(u for (u, _ln) in find_urls(location) if u)
    licensish.ignorable_urls = sorted(urls)

    emails = set(u for (u, _ln) in find_emails(location) if u)
    licensish.ignorable_emails = sorted(emails)
    if dump:
        licensish.dump()
    return licensish

コード例 #19

0

ファイルを表示

ファイル: api.py プロジェクト: jarnugirdhar/scancode-toolkit

def get_copyrights(location):
    """
    Yield an iterable of dictionaries of copyright data detected in the file at
    location. Each item contains a list of copyright statements and a start and
    end line.
    """
    from cluecode.copyrights import detect_copyrights

    for copyrights, authors, _years, holders, start_line, end_line in detect_copyrights(
            location):
        if not copyrights:
            continue
        result = OrderedDict()
        # FIXME: we should call this copyright instead, and yield one item per statement
        result['statements'] = copyrights
        result['holders'] = holders
        result['authors'] = authors
        result['start_line'] = start_line
        result['end_line'] = end_line
        yield result

コード例 #20

0

ファイルを表示

    def closure_test_function(*args, **kwargs):
        detections = detect_copyrights(test_file)
        copyrights, holders, authors = Detection.split_values(detections)

        holders_summary = []
        if 'holders_summary' in test.what:
            holders_summary = as_sorted_mapping(tally_persons(holders))

        copyrights_summary = []
        if 'copyrights_summary' in test.what:
            copyrights_summary = as_sorted_mapping(tally_copyrights(copyrights))

        authors_summary = []
        if 'authors_summary' in test.what:
            authors_summary = as_sorted_mapping(tally_persons(authors))

        results = dict(
            copyrights=copyrights,
            authors=authors,
            holders=holders,
            holders_summary=holders_summary,
            copyrights_summary=copyrights_summary,
            authors_summary=authors_summary,
        )

        expected_yaml = test.dumps()

        for wht in test.what:
            setattr(test, wht, results.get(wht))
        results_yaml = test.dumps()

        if regen:
            test.dump()
        if expected_yaml != results_yaml:
            expected_yaml = (
                'data file: file://' + data_file +
                '\ntest file: file://' + test_file + '\n'
            ) + expected_yaml

            assert results_yaml == expected_yaml

コード例 #21

0

ファイルを表示

def get_detections(test_file):
    detections = detect_copyrights(test_file)
    return Detection.split_values(detections)