Example #1
0
def get_pkg_dict_for_index(attr_list, index):
    """Given the package dictionary with attribute lists of the form
    {'name': [...], 'version': [...],...} and an index, return
    a package dictionary of the form {'name': x1, 'version': x2,...} for that
    index"""
    pkg_dict = {}
    for key in attr_list.keys():
        if key == 'files':
            # convert file paths into FileData dictionaries
            fd_list = []
            for filepath in attr_list['files'][index]:
                fd_dict = FileData(os.path.split(filepath)[1],
                                   filepath).to_dict()
                fd_list.append(fd_dict)
            pkg_dict.update({'files': fd_list})
        else:
            pkg_dict.update({key: attr_list[key][index]})
    return pkg_dict
Example #2
0
def collect_layer_data(layer_obj):
    '''Use scancode to collect data from a layer filesystem. This function will
    create a FileData object for every file found. After scanning, it will
    return a list of FileData objects.
    '''
    files = []
    # run scancode against a directory
    command = 'scancode -ilpcu --quiet --json -'
    full_cmd = get_filesystem_command(layer_obj, command)
    origin_layer = 'Layer: ' + layer_obj.fs_hash[:10]
    result, error = rootfs.shell_command(True, full_cmd)
    if not result:
        logger.error("No scancode results for this layer: %s", str(error))
        layer_obj.origins.add_notice_to_origins(origin_layer,
                                                Notice(str(error), 'error'))
    else:
        # make FileData objects for each result
        data = json.loads(result)
        for f in data['files']:
            if f['type'] == 'file':
                # scancode records paths from the target directory onwards
                # which in tern's case is tern.utils.constants.untar_dir
                # removing that portion of the file path
                fspath = f['path'].replace(constants.untar_dir + os.path.sep,
                                           '')
                fd = FileData(f['name'], fspath, f['date'], f['file_type'])
                if f['licenses']:
                    fd.licenses = [l['short_name'] for l in f['licenses']]
                fd.license_expressions = f['license_expressions']
                if f['copyrights']:
                    fd.copyrights = [c['value'] for c in f['copyrights']]
                if f['urls']:
                    fd.urls = [u['url'] for u in f['urls']]
                fd.packages = f['packages']
                fd.authors = [a['value'] for a in f['authors']]
                if f['scan_errors']:
                    # for each scan error make a notice
                    for err in f['scan_errors']:
                        fd.origins.add_notice_to_origins(
                            'File: ' + fd.path, Notice(err, 'error'))
                files.append(fd)
    return files
Example #3
0
def convert_to_pkg_dicts(pkg_dict):
    '''The pkg_dict is what gets returned after collecting individual
    metadata as a list. It looks like this if property collected:
        {'names': [....], 'versions': [...], 'licenses': [...], ....}
    Convert these into a package dictionary expected by the Package
    Object'''
    mapping = {
        'name': 'names',
        'version': 'versions',
        'pkg_license': 'licenses',
        'copyright': 'copyrights',
        'proj_url': 'proj_urls',
        'pkg_licenses': 'pkg_licenses',
        'files': 'files'
    }
    pkg_list = []
    len_names = len(pkg_dict['names'])
    # make a list of keys that correspond with package property names
    new_dict = {}
    for key, value in mapping.items():
        if value in pkg_dict.keys():
            if len(pkg_dict[value]) == len_names:
                new_dict.update({key: pkg_dict[value]})
            else:
                logger.warning("Inconsistent lengths for key: %s", value)
    # convert each of the keys into package dictionaries
    for index, _ in enumerate(new_dict['name']):
        a_pkg = {}
        for key, value in new_dict.items():
            if key == 'files':
                # update the list with FileData objects in dictionary format
                fd_list = []
                for filepath in value[index]:
                    fd_dict = FileData(os.path.split(filepath)[1],
                                       filepath).to_dict()
                    fd_list.append(fd_dict)
                a_pkg.update({'files': fd_list})
            else:
                a_pkg.update({key: value[index]})
        pkg_list.append(a_pkg)
    return pkg_list
Example #4
0
 def _parse_hash_content(self, content):
     """This is an internal function to parse the content of the hash
     and return a list of FileData objects
     The content consists of lines of the form:
     permissions|uid|gid|size|hard links|  sha256sum  filepath xattrs
     where xattrs is the list of extended attributes for the file
     The extended attributes start with a '# file' indicator, followed
     by a list of key-value pairs separated by newlines. For now, we will
     conserve the key-value pair list as they appear and separate
     each one by a comma"""
     file_list = []
     # keep track of where we are on the list of files
     index = 0
     # loop through the content
     while content:
         line = content.pop(0)
         if "# file" in line:
             # collect the extended attributes
             xattrlist = []
             xattrline = content.pop(0)
             while xattrline != '\n':
                 if 'selinux' not in xattrline:
                     xattrlist.append(xattrline.strip())
                 xattrline = content.pop(0)
             # when we break out of the extended attributes loop
             # we combine the results and update the FileData object
             # existing in the previous index
             file_list[index-1].extattrs = file_list[index-1].extattrs + \
                 "  " + ','.join(xattrlist)
         else:
             # collect the regular attributes
             file_info = line[:-1].split('  ')
             file_data = FileData(os.path.basename(file_info[2]),
                                  os.path.relpath(file_info[2], '.'))
             file_data.set_checksum('sha256', file_info[1])
             file_data.set_whiteout()
             file_list.append(file_data)
             index = index + 1
     return file_list
Example #5
0
def get_scancode_file(file_dict):
    '''Given a file dictionary from the scancode results, return a FileData
    object with the results'''
    # scancode records paths from the target directory onwards
    # which in tern's case is tern.utils.constants.untar_dir
    # removing that portion of the file path
    fspath = file_dict['path'].replace(
        constants.untar_dir + os.path.sep, '')
    fd = FileData(
        file_dict['name'], fspath, file_dict['date'], file_dict['file_type'])
    fd.short_file_type = get_file_type(file_dict)
    fd.add_checksums({'sha1': file_dict['sha1'], 'md5': file_dict['md5']})
    if file_dict['licenses']:
        fd.licenses = [li['short_name'] for li in file_dict['licenses']]
    fd.license_expressions = file_dict['license_expressions']
    if file_dict['copyrights']:
        fd.copyrights = [c['value'] for c in file_dict['copyrights']]
    if file_dict['urls']:
        fd.urls = [u['url'] for u in file_dict['urls']]
    fd.packages = file_dict['packages']
    fd.authors = [a['value'] for a in file_dict['authors']]
    if file_dict['scan_errors']:
        # for each scan error make a notice
        for err in file_dict['scan_errors']:
            fd.origins.add_notice_to_origins(
                'File: ' + fd.path, Notice(err, 'error'))
    return fd
Example #6
0
 def testRemoveFile(self):
     file1 = FileData('file1', 'path/to/file1')
     self.layer.add_file(file1)
     self.assertFalse(self.layer.remove_file('file1'))
     self.assertTrue(self.layer.remove_file('path/to/file1'))
     self.assertFalse(self.layer.remove_file('path/to/file1'))
Example #7
0
 def testExtAttrs(self):
     file = FileData('test.txt', 'usr/abc/test.txt')
     file.extattrs = '-rw-r--r--|1000|1000|19|1'
     self.assertEqual(file.extattrs, '-rw-r--r--|1000|1000|19|1')
Example #8
0
    def testInstance(self):
        file1 = FileData('file1', 'path/to/file1')
        self.assertEqual(file1.name, 'file1')
        self.assertEqual(file1.path, 'path/to/file1')
        self.assertFalse(file1.checksum_type)
        self.assertFalse(file1.checksum)
        self.assertFalse(file1.date)
        self.assertFalse(file1.version_control)
        self.assertFalse(file1.version)
        self.assertFalse(file1.file_type)
        self.assertFalse(file1.short_file_type)
        self.assertFalse(file1.licenses)
        self.assertFalse(file1.license_expressions)
        self.assertFalse(file1.copyrights)
        self.assertFalse(file1.authors)
        self.assertFalse(file1.packages)
        self.assertFalse(file1.urls)

        with self.assertRaises(ValueError):
            file2 = FileData('file2', 'path/to/file2', '12355')
        file1.file_type = 'ELF'
        self.assertEqual(file1.file_type, 'ELF')
        with self.assertRaises(ValueError):
            file1.short_file_type = 'SOMETHING'
        file1.short_file_type = 'BINARY'
        self.assertEqual(file1.short_file_type, 'BINARY')
        file2 = FileData('file2', 'path/to/file2', '2020-01-01', 'binary')
        self.assertEqual(file2.date, '2020-01-01')
        self.assertEqual(file2.file_type, 'binary')

        file2.licenses = ['MIT', 'GPL']
        file2.license_expressions = ['GPLv2 or MIT', 'MIT and GPLv2']
        file2.copyrights = ['copyrights']
        file2.authors = ['author1', 'author2']
        file2.packages = ['package1', 'package2']
        self.assertEqual(file2.licenses, ['MIT', 'GPL'])
        self.assertEqual(file2.license_expressions,
                         ['GPLv2 or MIT', 'MIT and GPLv2'])
        self.assertEqual(file2.copyrights, ['copyrights'])
        self.assertEqual(file2.authors, ['author1', 'author2'])
        self.assertEqual(file2.packages, ['package1', 'package2'])
Example #9
0
 def testMerge(self):
     file1 = FileData('switch_root', 'sbin/switch_root')
     file1.set_checksum('sha256', '123abc456def')
     file1.extattrs = '-rwxr-xr-x|1000|1000|14408|1'
     file2 = FileData('switch_root', 'sbin/switch_root')
     file2.add_checksums({
         'SHA1': '12345abcde',
         'MD5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
     })
     file2.set_checksum('sha256', '123abc456def')
     file2.extattrs = '-rwxr-xr-x|1000|1000|14408|1'
     file2.date = '2012-02-02'
     file2.file_type = 'binary'
     file2.short_file_type = 'BINARY'
     file2.licenses = ['MIT', 'GPL']
     file2.license_expressions = ['MIT or GPL']
     file2.copyrights = ['copyrights']
     file2.authors = ['author1', 'author2']
     file2.packages = ['package1', 'package2']
     file2.urls = ['url1', 'url2']
     file2.origins.add_notice_to_origins(
         'scanning', Notice('something happened', 'error'))
     file3 = FileData('switch_root', 'sbin/switch_root')
     file3.set_checksum('sha1', '456def123abc')
     file4 = FileData('e2image', 'sbin/e2image')
     self.assertFalse(file1.merge(file4))
     self.assertTrue(file1.merge(file3))
     self.assertEqual(file1.checksum, '123abc456def')
     self.assertEqual(file1.extattrs, '-rwxr-xr-x|1000|1000|14408|1')
     self.assertFalse(file1.merge('astring'))
     self.assertTrue(file1.merge(file2))
     self.assertEqual(file1.checksums, {
         'sha1': '12345abcde',
         'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
     })
     self.assertEqual(file1.date, '2012-02-02')
     self.assertEqual(file1.file_type, 'binary')
     self.assertEqual(file1.short_file_type, 'BINARY')
     self.assertEqual(file1.licenses, ['MIT', 'GPL'])
     self.assertEqual(file1.license_expressions, ['MIT or GPL'])
     self.assertEqual(file1.copyrights, ['copyrights'])
     self.assertEqual(file1.authors, ['author1', 'author2'])
     self.assertEqual(file1.packages, ['package1', 'package2'])
     self.assertEqual(file1.urls, ['url1', 'url2'])
     self.assertEqual(len(file1.origins.origins), 1)
     self.assertEqual(file1.origins.origins[0].origin_str, 'scanning')
     self.assertEqual(len(file1.origins.origins[0].notices), 1)
     self.assertEqual(file1.origins.origins[0].notices[0].message,
                      'something happened')
Example #10
0
 def setUp(self):
     self.afile = FileData('afile', 'path/to/afile')
     self.afile.licenses = ['MIT', 'GPL']
Example #11
0
class TestClassFileData(unittest.TestCase):
    def setUp(self):
        self.afile = FileData('afile', 'path/to/afile')
        self.afile.licenses = ['MIT', 'GPL']

    def tearDown(self):
        del self.afile

    def testInstance(self):
        file1 = FileData('file1', 'path/to/file1')
        self.assertEqual(file1.name, 'file1')
        self.assertEqual(file1.path, 'path/to/file1')
        self.assertFalse(file1.checksum_type)
        self.assertFalse(file1.checksum)
        self.assertFalse(file1.date)
        self.assertFalse(file1.version_control)
        self.assertFalse(file1.version)
        self.assertFalse(file1.file_type)
        self.assertFalse(file1.short_file_type)
        self.assertFalse(file1.licenses)
        self.assertFalse(file1.license_expressions)
        self.assertFalse(file1.copyrights)
        self.assertFalse(file1.authors)
        self.assertFalse(file1.packages)
        self.assertFalse(file1.urls)

        with self.assertRaises(ValueError):
            file2 = FileData('file2', 'path/to/file2', '12355')
        file1.file_type = 'ELF'
        self.assertEqual(file1.file_type, 'ELF')
        with self.assertRaises(ValueError):
            file1.short_file_type = 'SOMETHING'
        file1.short_file_type = 'BINARY'
        self.assertEqual(file1.short_file_type, 'BINARY')
        file2 = FileData('file2', 'path/to/file2', '2020-01-01', 'binary')
        self.assertEqual(file2.date, '2020-01-01')
        self.assertEqual(file2.file_type, 'binary')

        file2.licenses = ['MIT', 'GPL']
        file2.license_expressions = ['GPLv2 or MIT', 'MIT and GPLv2']
        file2.copyrights = ['copyrights']
        file2.authors = ['author1', 'author2']
        file2.packages = ['package1', 'package2']
        self.assertEqual(file2.licenses, ['MIT', 'GPL'])
        self.assertEqual(file2.license_expressions,
                         ['GPLv2 or MIT', 'MIT and GPLv2'])
        self.assertEqual(file2.copyrights, ['copyrights'])
        self.assertEqual(file2.authors, ['author1', 'author2'])
        self.assertEqual(file2.packages, ['package1', 'package2'])

    def testSetChecksum(self):
        self.afile.set_checksum('sha256', '12345abcde')
        self.assertEqual(self.afile.checksum_type, 'sha256')
        self.assertEqual(self.afile.checksum, '12345abcde')

    def testAddChecksums(self):
        file1 = FileData('file1', 'path/to/file1')
        file1.add_checksums({
            'SHA1': '12345abcde',
            'MD5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
        })
        self.assertEqual(file1.checksums, {
            'sha1': '12345abcde',
            'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
        })

    def testGetChecksum(self):
        self.afile.add_checksums({
            'SHA1': '12345abcde',
            'MD5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
        })
        self.assertEqual(self.afile.get_checksum('sha1'), '12345abcde')
        self.assertEqual(self.afile.get_checksum('MD5'),
                         '1ff38cc592c4c5d0c8e3ca38be8f1eb1')
        self.assertIsNone(self.afile.get_checksum('sha256'))

    def testExtAttrs(self):
        file = FileData('test.txt', 'usr/abc/test.txt')
        file.extattrs = '-rw-r--r--|1000|1000|19|1'
        self.assertEqual(file.extattrs, '-rw-r--r--|1000|1000|19|1')

    def testSetVersion(self):
        self.afile.set_version('git', '12345abcde')
        self.assertEqual(self.afile.version_control, 'git')
        self.assertEqual(self.afile.version, '12345abcde')

    def testToDict(self):
        file_dict = self.afile.to_dict()
        self.assertEqual(file_dict['name'], 'afile')
        self.assertEqual(file_dict['path'], 'path/to/afile')
        self.assertEqual(file_dict['licenses'], ['MIT', 'GPL'])

    def testToDictTemplate(self):
        template1 = TestTemplate1()
        template2 = TestTemplate2()
        dict1 = self.afile.to_dict(template1)
        dict2 = self.afile.to_dict(template2)
        self.assertEqual(len(dict1.keys()), 3)
        self.assertEqual(dict1['file.name'], 'afile')
        self.assertEqual(dict1['file.path'], 'path/to/afile')
        self.assertEqual(dict1['file.licenses'], ['MIT', 'GPL'])
        self.assertFalse(dict2['notes'])

    def testFill(self):
        file_dict = {
            'name': 'zconf.h',
            'path': '/usr/include/zconf.h',
            'checksum_type': 'sha256',
            'checksum': '77304005ceb5f0d03ad4c37eb8386a10866e'
            '4ceeb204f7c3b6599834c7319541',
            'extattrs': '-rw-r--r-- 1 1000 1000 16262 Nov 13 17:57'
            ' /usr/include/zconf.h',
            'checksums': {
                'sha1': '12345abcde',
                'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
            }
        }
        f = FileData('zconf.h', '/usr/include/zconf.h')
        f.fill(file_dict)
        self.assertEqual(f.name, 'zconf.h')
        self.assertEqual(f.path, '/usr/include/zconf.h')
        self.assertEqual(f.checksum_type, 'sha256')
        self.assertEqual(
            f.checksum, '77304005ceb5f0d03ad4c37eb838'
            '6a10866e4ceeb204f7c3b6599834c7319541')
        self.assertEqual(
            f.extattrs, '-rw-r--r-- 1 1000 1000 '
            '16262 Nov 13 17:57 /usr/include/zconf.h')
        self.assertEqual(f.checksums, {
            'sha1': '12345abcde',
            'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
        })
        self.assertEqual(f.origins.origins[0].notices[0].message,
                         'No metadata for key: date')
        self.assertEqual(f.origins.origins[0].notices[1].message,
                         'No metadata for key: file_type')
        self.assertEqual(f.origins.origins[0].notices[1].level, 'warning')
        self.assertEqual(f.origins.origins[0].notices[2].message,
                         'No metadata for key: short_file_type')

    def testMerge(self):
        file1 = FileData('switch_root', 'sbin/switch_root')
        file1.set_checksum('sha256', '123abc456def')
        file1.extattrs = '-rwxr-xr-x|1000|1000|14408|1'
        file2 = FileData('switch_root', 'sbin/switch_root')
        file2.add_checksums({
            'SHA1': '12345abcde',
            'MD5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
        })
        file2.set_checksum('sha256', '123abc456def')
        file2.extattrs = '-rwxr-xr-x|1000|1000|14408|1'
        file2.date = '2012-02-02'
        file2.file_type = 'binary'
        file2.short_file_type = 'BINARY'
        file2.licenses = ['MIT', 'GPL']
        file2.license_expressions = ['MIT or GPL']
        file2.copyrights = ['copyrights']
        file2.authors = ['author1', 'author2']
        file2.packages = ['package1', 'package2']
        file2.urls = ['url1', 'url2']
        file2.origins.add_notice_to_origins(
            'scanning', Notice('something happened', 'error'))
        file3 = FileData('switch_root', 'sbin/switch_root')
        file3.set_checksum('sha1', '456def123abc')
        file4 = FileData('e2image', 'sbin/e2image')
        self.assertFalse(file1.merge(file4))
        self.assertTrue(file1.merge(file3))
        self.assertEqual(file1.checksum, '123abc456def')
        self.assertEqual(file1.extattrs, '-rwxr-xr-x|1000|1000|14408|1')
        self.assertFalse(file1.merge('astring'))
        self.assertTrue(file1.merge(file2))
        self.assertEqual(file1.checksums, {
            'sha1': '12345abcde',
            'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1'
        })
        self.assertEqual(file1.date, '2012-02-02')
        self.assertEqual(file1.file_type, 'binary')
        self.assertEqual(file1.short_file_type, 'BINARY')
        self.assertEqual(file1.licenses, ['MIT', 'GPL'])
        self.assertEqual(file1.license_expressions, ['MIT or GPL'])
        self.assertEqual(file1.copyrights, ['copyrights'])
        self.assertEqual(file1.authors, ['author1', 'author2'])
        self.assertEqual(file1.packages, ['package1', 'package2'])
        self.assertEqual(file1.urls, ['url1', 'url2'])
        self.assertEqual(len(file1.origins.origins), 1)
        self.assertEqual(file1.origins.origins[0].origin_str, 'scanning')
        self.assertEqual(len(file1.origins.origins[0].notices), 1)
        self.assertEqual(file1.origins.origins[0].notices[0].message,
                         'something happened')
Example #12
0
 def testSetWhiteout(self):
     whiteout = FileData('.wh.os-release', 'usr/lib/.wh.os-release')
     whiteout.set_whiteout()
     self.assertTrue(whiteout.is_whiteout)
     self.afile.set_whiteout()
     self.assertFalse(self.afile.is_whiteout)
Example #13
0
 def setUp(self):
     self.command1 = Command("yum install nfs-utils")
     self.command2 = Command("yum remove nfs-utils")
     self.image = TestImage('5678efgh')
     self.file = FileData('README.txt', '/home/test')
     cache.cache = {}
Example #14
0
class TestClassFileData(unittest.TestCase):
    def setUp(self):
        self.afile = FileData('afile', 'path/to/afile')
        self.afile.licenses = ['MIT', 'GPL']

    def tearDown(self):
        del self.afile

    def testInstance(self):
        file1 = FileData('file1', 'path/to/file1')
        self.assertEqual(file1.name, 'file1')
        self.assertEqual(file1.path, 'path/to/file1')
        self.assertFalse(file1.checksum_type)
        self.assertFalse(file1.checksum)
        self.assertFalse(file1.date)
        self.assertFalse(file1.version_control)
        self.assertFalse(file1.version)
        self.assertFalse(file1.file_type)
        self.assertFalse(file1.licenses)
        self.assertFalse(file1.license_expressions)
        self.assertFalse(file1.copyrights)
        self.assertFalse(file1.authors)
        self.assertFalse(file1.packages)
        self.assertFalse(file1.urls)

        with self.assertRaises(ValueError):
            file2 = FileData('file2', 'path/to/file2', '12355')
        file2 = FileData('file2', 'path/to/file2', '2020-01-01', 'binary')
        self.assertEqual(file2.date, '2020-01-01')
        self.assertEqual(file2.file_type, 'binary')

        file2.licenses = ['MIT', 'GPL']
        file2.license_expressions = ['GPLv2 or MIT', 'MIT and GPLv2']
        file2.copyrights = ['copyrights']
        file2.authors = ['author1', 'author2']
        file2.packages = ['package1', 'package2']
        self.assertEqual(file2.licenses, ['MIT', 'GPL'])
        self.assertEqual(file2.license_expressions,
                         ['GPLv2 or MIT', 'MIT and GPLv2'])
        self.assertEqual(file2.copyrights, ['copyrights'])
        self.assertEqual(file2.authors, ['author1', 'author2'])
        self.assertEqual(file2.packages, ['package1', 'package2'])

    def testSetChecksum(self):
        self.afile.set_checksum('sha256', '12345abcde')
        self.assertEqual(self.afile.checksum_type, 'sha256')
        self.assertEqual(self.afile.checksum, '12345abcde')

    def testSetVersion(self):
        self.afile.set_version('git', '12345abcde')
        self.assertEqual(self.afile.version_control, 'git')
        self.assertEqual(self.afile.version, '12345abcde')

    def testToDict(self):
        file_dict = self.afile.to_dict()
        self.assertEqual(file_dict['name'], 'afile')
        self.assertEqual(file_dict['path'], 'path/to/afile')
        self.assertEqual(file_dict['licenses'], ['MIT', 'GPL'])

    def testToDictTemplate(self):
        template1 = TestTemplate1()
        template2 = TestTemplate2()
        dict1 = self.afile.to_dict(template1)
        dict2 = self.afile.to_dict(template2)
        self.assertEqual(len(dict1.keys()), 3)
        self.assertEqual(dict1['file.name'], 'afile')
        self.assertEqual(dict1['file.path'], 'path/to/afile')
        self.assertEqual(dict1['file.licenses'], ['MIT', 'GPL'])
        self.assertFalse(dict2['notes'])