Exemple #1
0
def get_scancode_file(file_dict):
    '''Given a file dictionary from the scancode results, return a FileData
    object with the results'''
    # scancode records paths from the target directory onwards
    # which in tern's case is tern.utils.constants.untar_dir
    # removing that portion of the file path
    fspath = file_dict['path'].replace(
        constants.untar_dir + os.path.sep, '')
    fd = FileData(
        file_dict['name'], fspath, file_dict['date'], file_dict['file_type'])
    fd.short_file_type = get_file_type(file_dict)
    fd.add_checksums({'sha1': file_dict['sha1'], 'md5': file_dict['md5']})
    if file_dict['licenses']:
        fd.licenses = [li['short_name'] for li in file_dict['licenses']]
    fd.license_expressions = file_dict['license_expressions']
    if file_dict['copyrights']:
        fd.copyrights = [c['value'] for c in file_dict['copyrights']]
    if file_dict['urls']:
        fd.urls = [u['url'] for u in file_dict['urls']]
    fd.packages = file_dict['packages']
    fd.authors = [a['value'] for a in file_dict['authors']]
    if file_dict['scan_errors']:
        # for each scan error make a notice
        for err in file_dict['scan_errors']:
            fd.origins.add_notice_to_origins(
                'File: ' + fd.path, Notice(err, 'error'))
    return fd
Exemple #2
0
    def testInstance(self):
        file1 = FileData('file1', 'path/to/file1')
        self.assertEqual(file1.name, 'file1')
        self.assertEqual(file1.path, 'path/to/file1')
        self.assertFalse(file1.checksum_type)
        self.assertFalse(file1.checksum)
        self.assertFalse(file1.date)
        self.assertFalse(file1.version_control)
        self.assertFalse(file1.version)
        self.assertFalse(file1.file_type)
        self.assertFalse(file1.licenses)
        self.assertFalse(file1.license_expressions)
        self.assertFalse(file1.copyrights)
        self.assertFalse(file1.authors)
        self.assertFalse(file1.packages)
        self.assertFalse(file1.urls)

        with self.assertRaises(ValueError):
            file2 = FileData('file2', 'path/to/file2', '12355')
        file2 = FileData('file2', 'path/to/file2', '2020-01-01', 'binary')
        self.assertEqual(file2.date, '2020-01-01')
        self.assertEqual(file2.file_type, 'binary')

        file2.licenses = ['MIT', 'GPL']
        file2.license_expressions = ['GPLv2 or MIT', 'MIT and GPLv2']
        file2.copyrights = ['copyrights']
        file2.authors = ['author1', 'author2']
        file2.packages = ['package1', 'package2']
        self.assertEqual(file2.licenses, ['MIT', 'GPL'])
        self.assertEqual(file2.license_expressions,
                         ['GPLv2 or MIT', 'MIT and GPLv2'])
        self.assertEqual(file2.copyrights, ['copyrights'])
        self.assertEqual(file2.authors, ['author1', 'author2'])
        self.assertEqual(file2.packages, ['package1', 'package2'])
Exemple #3
0
 def testMerge(self):
     file1 = FileData('switch_root', 'sbin/switch_root')
     file1.set_checksum('sha256', '123abc456def')
     file1.extattrs = '-rwxr-xr-x|1000|1000|14408|1'
     file2 = FileData('switch_root', 'sbin/switch_root')
     file2.add_checksums({
         'SHA1': '12345abcde',
         'MD5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
     })
     file2.set_checksum('sha256', '123abc456def')
     file2.extattrs = '-rwxr-xr-x|1000|1000|14408|1'
     file2.date = '2012-02-02'
     file2.file_type = 'binary'
     file2.short_file_type = 'BINARY'
     file2.licenses = ['MIT', 'GPL']
     file2.license_expressions = ['MIT or GPL']
     file2.copyrights = ['copyrights']
     file2.authors = ['author1', 'author2']
     file2.packages = ['package1', 'package2']
     file2.urls = ['url1', 'url2']
     file2.origins.add_notice_to_origins(
         'scanning', Notice('something happened', 'error'))
     file3 = FileData('switch_root', 'sbin/switch_root')
     file3.set_checksum('sha1', '456def123abc')
     file4 = FileData('e2image', 'sbin/e2image')
     self.assertFalse(file1.merge(file4))
     self.assertTrue(file1.merge(file3))
     self.assertEqual(file1.checksum, '123abc456def')
     self.assertEqual(file1.extattrs, '-rwxr-xr-x|1000|1000|14408|1')
     self.assertFalse(file1.merge('astring'))
     self.assertTrue(file1.merge(file2))
     self.assertEqual(file1.checksums, {
         'sha1': '12345abcde',
         'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
     })
     self.assertEqual(file1.date, '2012-02-02')
     self.assertEqual(file1.file_type, 'binary')
     self.assertEqual(file1.short_file_type, 'BINARY')
     self.assertEqual(file1.licenses, ['MIT', 'GPL'])
     self.assertEqual(file1.license_expressions, ['MIT or GPL'])
     self.assertEqual(file1.copyrights, ['copyrights'])
     self.assertEqual(file1.authors, ['author1', 'author2'])
     self.assertEqual(file1.packages, ['package1', 'package2'])
     self.assertEqual(file1.urls, ['url1', 'url2'])
     self.assertEqual(len(file1.origins.origins), 1)
     self.assertEqual(file1.origins.origins[0].origin_str, 'scanning')
     self.assertEqual(len(file1.origins.origins[0].notices), 1)
     self.assertEqual(file1.origins.origins[0].notices[0].message,
                      'something happened')
Exemple #4
0
def collect_layer_data(layer_obj):
    '''Use scancode to collect data from a layer filesystem. This function will
    create a FileData object for every file found. After scanning, it will
    return a list of FileData objects.
    '''
    files = []
    # run scancode against a directory
    command = 'scancode -ilpcu --quiet --json -'
    full_cmd = get_filesystem_command(layer_obj, command)
    origin_layer = 'Layer: ' + layer_obj.fs_hash[:10]
    result, error = rootfs.shell_command(True, full_cmd)
    if not result:
        logger.error("No scancode results for this layer: %s", str(error))
        layer_obj.origins.add_notice_to_origins(origin_layer,
                                                Notice(str(error), 'error'))
    else:
        # make FileData objects for each result
        data = json.loads(result)
        for f in data['files']:
            if f['type'] == 'file':
                # scancode records paths from the target directory onwards
                # which in tern's case is tern.utils.constants.untar_dir
                # removing that portion of the file path
                fspath = f['path'].replace(constants.untar_dir + os.path.sep,
                                           '')
                fd = FileData(f['name'], fspath, f['date'], f['file_type'])
                if f['licenses']:
                    fd.licenses = [l['short_name'] for l in f['licenses']]
                fd.license_expressions = f['license_expressions']
                if f['copyrights']:
                    fd.copyrights = [c['value'] for c in f['copyrights']]
                if f['urls']:
                    fd.urls = [u['url'] for u in f['urls']]
                fd.packages = f['packages']
                fd.authors = [a['value'] for a in f['authors']]
                if f['scan_errors']:
                    # for each scan error make a notice
                    for err in f['scan_errors']:
                        fd.origins.add_notice_to_origins(
                            'File: ' + fd.path, Notice(err, 'error'))
                files.append(fd)
    return files