def get_pkg_dict_for_index(attr_list, index): """Given the package dictionary with attribute lists of the form {'name': [...], 'version': [...],...} and an index, return a package dictionary of the form {'name': x1, 'version': x2,...} for that index""" pkg_dict = {} for key in attr_list.keys(): if key == 'files': # convert file paths into FileData dictionaries fd_list = [] for filepath in attr_list['files'][index]: fd_dict = FileData(os.path.split(filepath)[1], filepath).to_dict() fd_list.append(fd_dict) pkg_dict.update({'files': fd_list}) else: pkg_dict.update({key: attr_list[key][index]}) return pkg_dict
def collect_layer_data(layer_obj): '''Use scancode to collect data from a layer filesystem. This function will create a FileData object for every file found. After scanning, it will return a list of FileData objects. ''' files = [] # run scancode against a directory command = 'scancode -ilpcu --quiet --json -' full_cmd = get_filesystem_command(layer_obj, command) origin_layer = 'Layer: ' + layer_obj.fs_hash[:10] result, error = rootfs.shell_command(True, full_cmd) if not result: logger.error("No scancode results for this layer: %s", str(error)) layer_obj.origins.add_notice_to_origins(origin_layer, Notice(str(error), 'error')) else: # make FileData objects for each result data = json.loads(result) for f in data['files']: if f['type'] == 'file': # scancode records paths from the target directory onwards # which in tern's case is tern.utils.constants.untar_dir # removing that portion of the file path fspath = f['path'].replace(constants.untar_dir + os.path.sep, '') fd = FileData(f['name'], fspath, f['date'], f['file_type']) if f['licenses']: fd.licenses = [l['short_name'] for l in f['licenses']] fd.license_expressions = f['license_expressions'] if f['copyrights']: fd.copyrights = [c['value'] for c in f['copyrights']] if f['urls']: fd.urls = [u['url'] for u in f['urls']] fd.packages = f['packages'] fd.authors = [a['value'] for a in f['authors']] if f['scan_errors']: # for each scan error make a notice for err in f['scan_errors']: fd.origins.add_notice_to_origins( 'File: ' + fd.path, Notice(err, 'error')) files.append(fd) return files
def convert_to_pkg_dicts(pkg_dict): '''The pkg_dict is what gets returned after collecting individual metadata as a list. It looks like this if property collected: {'names': [....], 'versions': [...], 'licenses': [...], ....} Convert these into a package dictionary expected by the Package Object''' mapping = { 'name': 'names', 'version': 'versions', 'pkg_license': 'licenses', 'copyright': 'copyrights', 'proj_url': 'proj_urls', 'pkg_licenses': 'pkg_licenses', 'files': 'files' } pkg_list = [] len_names = len(pkg_dict['names']) # make a list of keys that correspond with package property names new_dict = {} for key, value in mapping.items(): if value in pkg_dict.keys(): if len(pkg_dict[value]) == len_names: new_dict.update({key: pkg_dict[value]}) else: logger.warning("Inconsistent lengths for key: %s", value) # convert each of the keys into package dictionaries for index, _ in enumerate(new_dict['name']): a_pkg = {} for key, value in new_dict.items(): if key == 'files': # update the list with FileData objects in dictionary format fd_list = [] for filepath in value[index]: fd_dict = FileData(os.path.split(filepath)[1], filepath).to_dict() fd_list.append(fd_dict) a_pkg.update({'files': fd_list}) else: a_pkg.update({key: value[index]}) pkg_list.append(a_pkg) return pkg_list
def _parse_hash_content(self, content): """This is an internal function to parse the content of the hash and return a list of FileData objects The content consists of lines of the form: permissions|uid|gid|size|hard links| sha256sum filepath xattrs where xattrs is the list of extended attributes for the file The extended attributes start with a '# file' indicator, followed by a list of key-value pairs separated by newlines. For now, we will conserve the key-value pair list as they appear and separate each one by a comma""" file_list = [] # keep track of where we are on the list of files index = 0 # loop through the content while content: line = content.pop(0) if "# file" in line: # collect the extended attributes xattrlist = [] xattrline = content.pop(0) while xattrline != '\n': if 'selinux' not in xattrline: xattrlist.append(xattrline.strip()) xattrline = content.pop(0) # when we break out of the extended attributes loop # we combine the results and update the FileData object # existing in the previous index file_list[index-1].extattrs = file_list[index-1].extattrs + \ " " + ','.join(xattrlist) else: # collect the regular attributes file_info = line[:-1].split(' ') file_data = FileData(os.path.basename(file_info[2]), os.path.relpath(file_info[2], '.')) file_data.set_checksum('sha256', file_info[1]) file_data.set_whiteout() file_list.append(file_data) index = index + 1 return file_list
def get_scancode_file(file_dict): '''Given a file dictionary from the scancode results, return a FileData object with the results''' # scancode records paths from the target directory onwards # which in tern's case is tern.utils.constants.untar_dir # removing that portion of the file path fspath = file_dict['path'].replace( constants.untar_dir + os.path.sep, '') fd = FileData( file_dict['name'], fspath, file_dict['date'], file_dict['file_type']) fd.short_file_type = get_file_type(file_dict) fd.add_checksums({'sha1': file_dict['sha1'], 'md5': file_dict['md5']}) if file_dict['licenses']: fd.licenses = [li['short_name'] for li in file_dict['licenses']] fd.license_expressions = file_dict['license_expressions'] if file_dict['copyrights']: fd.copyrights = [c['value'] for c in file_dict['copyrights']] if file_dict['urls']: fd.urls = [u['url'] for u in file_dict['urls']] fd.packages = file_dict['packages'] fd.authors = [a['value'] for a in file_dict['authors']] if file_dict['scan_errors']: # for each scan error make a notice for err in file_dict['scan_errors']: fd.origins.add_notice_to_origins( 'File: ' + fd.path, Notice(err, 'error')) return fd
def testRemoveFile(self): file1 = FileData('file1', 'path/to/file1') self.layer.add_file(file1) self.assertFalse(self.layer.remove_file('file1')) self.assertTrue(self.layer.remove_file('path/to/file1')) self.assertFalse(self.layer.remove_file('path/to/file1'))
def testExtAttrs(self): file = FileData('test.txt', 'usr/abc/test.txt') file.extattrs = '-rw-r--r--|1000|1000|19|1' self.assertEqual(file.extattrs, '-rw-r--r--|1000|1000|19|1')
def testInstance(self): file1 = FileData('file1', 'path/to/file1') self.assertEqual(file1.name, 'file1') self.assertEqual(file1.path, 'path/to/file1') self.assertFalse(file1.checksum_type) self.assertFalse(file1.checksum) self.assertFalse(file1.date) self.assertFalse(file1.version_control) self.assertFalse(file1.version) self.assertFalse(file1.file_type) self.assertFalse(file1.short_file_type) self.assertFalse(file1.licenses) self.assertFalse(file1.license_expressions) self.assertFalse(file1.copyrights) self.assertFalse(file1.authors) self.assertFalse(file1.packages) self.assertFalse(file1.urls) with self.assertRaises(ValueError): file2 = FileData('file2', 'path/to/file2', '12355') file1.file_type = 'ELF' self.assertEqual(file1.file_type, 'ELF') with self.assertRaises(ValueError): file1.short_file_type = 'SOMETHING' file1.short_file_type = 'BINARY' self.assertEqual(file1.short_file_type, 'BINARY') file2 = FileData('file2', 'path/to/file2', '2020-01-01', 'binary') self.assertEqual(file2.date, '2020-01-01') self.assertEqual(file2.file_type, 'binary') file2.licenses = ['MIT', 'GPL'] file2.license_expressions = ['GPLv2 or MIT', 'MIT and GPLv2'] file2.copyrights = ['copyrights'] file2.authors = ['author1', 'author2'] file2.packages = ['package1', 'package2'] self.assertEqual(file2.licenses, ['MIT', 'GPL']) self.assertEqual(file2.license_expressions, ['GPLv2 or MIT', 'MIT and GPLv2']) self.assertEqual(file2.copyrights, ['copyrights']) self.assertEqual(file2.authors, ['author1', 'author2']) self.assertEqual(file2.packages, ['package1', 'package2'])
def testMerge(self): file1 = FileData('switch_root', 'sbin/switch_root') file1.set_checksum('sha256', '123abc456def') file1.extattrs = '-rwxr-xr-x|1000|1000|14408|1' file2 = FileData('switch_root', 'sbin/switch_root') file2.add_checksums({ 'SHA1': '12345abcde', 'MD5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' }) file2.set_checksum('sha256', '123abc456def') file2.extattrs = '-rwxr-xr-x|1000|1000|14408|1' file2.date = '2012-02-02' file2.file_type = 'binary' file2.short_file_type = 'BINARY' file2.licenses = ['MIT', 'GPL'] file2.license_expressions = ['MIT or GPL'] file2.copyrights = ['copyrights'] file2.authors = ['author1', 'author2'] file2.packages = ['package1', 'package2'] file2.urls = ['url1', 'url2'] file2.origins.add_notice_to_origins( 'scanning', Notice('something happened', 'error')) file3 = FileData('switch_root', 'sbin/switch_root') file3.set_checksum('sha1', '456def123abc') file4 = FileData('e2image', 'sbin/e2image') self.assertFalse(file1.merge(file4)) self.assertTrue(file1.merge(file3)) self.assertEqual(file1.checksum, '123abc456def') self.assertEqual(file1.extattrs, '-rwxr-xr-x|1000|1000|14408|1') self.assertFalse(file1.merge('astring')) self.assertTrue(file1.merge(file2)) self.assertEqual(file1.checksums, { 'sha1': '12345abcde', 'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' }) self.assertEqual(file1.date, '2012-02-02') self.assertEqual(file1.file_type, 'binary') self.assertEqual(file1.short_file_type, 'BINARY') self.assertEqual(file1.licenses, ['MIT', 'GPL']) self.assertEqual(file1.license_expressions, ['MIT or GPL']) self.assertEqual(file1.copyrights, ['copyrights']) self.assertEqual(file1.authors, ['author1', 'author2']) self.assertEqual(file1.packages, ['package1', 'package2']) self.assertEqual(file1.urls, ['url1', 'url2']) self.assertEqual(len(file1.origins.origins), 1) self.assertEqual(file1.origins.origins[0].origin_str, 'scanning') self.assertEqual(len(file1.origins.origins[0].notices), 1) self.assertEqual(file1.origins.origins[0].notices[0].message, 'something happened')
def setUp(self): self.afile = FileData('afile', 'path/to/afile') self.afile.licenses = ['MIT', 'GPL']
class TestClassFileData(unittest.TestCase): def setUp(self): self.afile = FileData('afile', 'path/to/afile') self.afile.licenses = ['MIT', 'GPL'] def tearDown(self): del self.afile def testInstance(self): file1 = FileData('file1', 'path/to/file1') self.assertEqual(file1.name, 'file1') self.assertEqual(file1.path, 'path/to/file1') self.assertFalse(file1.checksum_type) self.assertFalse(file1.checksum) self.assertFalse(file1.date) self.assertFalse(file1.version_control) self.assertFalse(file1.version) self.assertFalse(file1.file_type) self.assertFalse(file1.short_file_type) self.assertFalse(file1.licenses) self.assertFalse(file1.license_expressions) self.assertFalse(file1.copyrights) self.assertFalse(file1.authors) self.assertFalse(file1.packages) self.assertFalse(file1.urls) with self.assertRaises(ValueError): file2 = FileData('file2', 'path/to/file2', '12355') file1.file_type = 'ELF' self.assertEqual(file1.file_type, 'ELF') with self.assertRaises(ValueError): file1.short_file_type = 'SOMETHING' file1.short_file_type = 'BINARY' self.assertEqual(file1.short_file_type, 'BINARY') file2 = FileData('file2', 'path/to/file2', '2020-01-01', 'binary') self.assertEqual(file2.date, '2020-01-01') self.assertEqual(file2.file_type, 'binary') file2.licenses = ['MIT', 'GPL'] file2.license_expressions = ['GPLv2 or MIT', 'MIT and GPLv2'] file2.copyrights = ['copyrights'] file2.authors = ['author1', 'author2'] file2.packages = ['package1', 'package2'] self.assertEqual(file2.licenses, ['MIT', 'GPL']) self.assertEqual(file2.license_expressions, ['GPLv2 or MIT', 'MIT and GPLv2']) self.assertEqual(file2.copyrights, ['copyrights']) self.assertEqual(file2.authors, ['author1', 'author2']) self.assertEqual(file2.packages, ['package1', 'package2']) def testSetChecksum(self): self.afile.set_checksum('sha256', '12345abcde') self.assertEqual(self.afile.checksum_type, 'sha256') self.assertEqual(self.afile.checksum, '12345abcde') def testAddChecksums(self): file1 = FileData('file1', 'path/to/file1') file1.add_checksums({ 'SHA1': '12345abcde', 'MD5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' }) self.assertEqual(file1.checksums, { 'sha1': '12345abcde', 'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' }) def testGetChecksum(self): self.afile.add_checksums({ 'SHA1': '12345abcde', 'MD5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' }) self.assertEqual(self.afile.get_checksum('sha1'), '12345abcde') self.assertEqual(self.afile.get_checksum('MD5'), '1ff38cc592c4c5d0c8e3ca38be8f1eb1') self.assertIsNone(self.afile.get_checksum('sha256')) def testExtAttrs(self): file = FileData('test.txt', 'usr/abc/test.txt') file.extattrs = '-rw-r--r--|1000|1000|19|1' self.assertEqual(file.extattrs, '-rw-r--r--|1000|1000|19|1') def testSetVersion(self): self.afile.set_version('git', '12345abcde') self.assertEqual(self.afile.version_control, 'git') self.assertEqual(self.afile.version, '12345abcde') def testToDict(self): file_dict = self.afile.to_dict() self.assertEqual(file_dict['name'], 'afile') self.assertEqual(file_dict['path'], 'path/to/afile') self.assertEqual(file_dict['licenses'], ['MIT', 'GPL']) def testToDictTemplate(self): template1 = TestTemplate1() template2 = TestTemplate2() dict1 = self.afile.to_dict(template1) dict2 = self.afile.to_dict(template2) self.assertEqual(len(dict1.keys()), 3) self.assertEqual(dict1['file.name'], 'afile') self.assertEqual(dict1['file.path'], 'path/to/afile') self.assertEqual(dict1['file.licenses'], ['MIT', 'GPL']) self.assertFalse(dict2['notes']) def testFill(self): file_dict = { 'name': 'zconf.h', 'path': '/usr/include/zconf.h', 'checksum_type': 'sha256', 'checksum': '77304005ceb5f0d03ad4c37eb8386a10866e' '4ceeb204f7c3b6599834c7319541', 'extattrs': '-rw-r--r-- 1 1000 1000 16262 Nov 13 17:57' ' /usr/include/zconf.h', 'checksums': { 'sha1': '12345abcde', 'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' } } f = FileData('zconf.h', '/usr/include/zconf.h') f.fill(file_dict) self.assertEqual(f.name, 'zconf.h') self.assertEqual(f.path, '/usr/include/zconf.h') self.assertEqual(f.checksum_type, 'sha256') self.assertEqual( f.checksum, '77304005ceb5f0d03ad4c37eb838' '6a10866e4ceeb204f7c3b6599834c7319541') self.assertEqual( f.extattrs, '-rw-r--r-- 1 1000 1000 ' '16262 Nov 13 17:57 /usr/include/zconf.h') self.assertEqual(f.checksums, { 'sha1': '12345abcde', 'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' }) self.assertEqual(f.origins.origins[0].notices[0].message, 'No metadata for key: date') self.assertEqual(f.origins.origins[0].notices[1].message, 'No metadata for key: file_type') self.assertEqual(f.origins.origins[0].notices[1].level, 'warning') self.assertEqual(f.origins.origins[0].notices[2].message, 'No metadata for key: short_file_type') def testMerge(self): file1 = FileData('switch_root', 'sbin/switch_root') file1.set_checksum('sha256', '123abc456def') file1.extattrs = '-rwxr-xr-x|1000|1000|14408|1' file2 = FileData('switch_root', 'sbin/switch_root') file2.add_checksums({ 'SHA1': '12345abcde', 'MD5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' }) file2.set_checksum('sha256', '123abc456def') file2.extattrs = '-rwxr-xr-x|1000|1000|14408|1' file2.date = '2012-02-02' file2.file_type = 'binary' file2.short_file_type = 'BINARY' file2.licenses = ['MIT', 'GPL'] file2.license_expressions = ['MIT or GPL'] file2.copyrights = ['copyrights'] file2.authors = ['author1', 'author2'] file2.packages = ['package1', 'package2'] file2.urls = ['url1', 'url2'] file2.origins.add_notice_to_origins( 'scanning', Notice('something happened', 'error')) file3 = FileData('switch_root', 'sbin/switch_root') file3.set_checksum('sha1', '456def123abc') file4 = FileData('e2image', 'sbin/e2image') self.assertFalse(file1.merge(file4)) self.assertTrue(file1.merge(file3)) self.assertEqual(file1.checksum, '123abc456def') self.assertEqual(file1.extattrs, '-rwxr-xr-x|1000|1000|14408|1') self.assertFalse(file1.merge('astring')) self.assertTrue(file1.merge(file2)) self.assertEqual(file1.checksums, { 'sha1': '12345abcde', 'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' }) self.assertEqual(file1.date, '2012-02-02') self.assertEqual(file1.file_type, 'binary') self.assertEqual(file1.short_file_type, 'BINARY') self.assertEqual(file1.licenses, ['MIT', 'GPL']) self.assertEqual(file1.license_expressions, ['MIT or GPL']) self.assertEqual(file1.copyrights, ['copyrights']) self.assertEqual(file1.authors, ['author1', 'author2']) self.assertEqual(file1.packages, ['package1', 'package2']) self.assertEqual(file1.urls, ['url1', 'url2']) self.assertEqual(len(file1.origins.origins), 1) self.assertEqual(file1.origins.origins[0].origin_str, 'scanning') self.assertEqual(len(file1.origins.origins[0].notices), 1) self.assertEqual(file1.origins.origins[0].notices[0].message, 'something happened')
def testSetWhiteout(self): whiteout = FileData('.wh.os-release', 'usr/lib/.wh.os-release') whiteout.set_whiteout() self.assertTrue(whiteout.is_whiteout) self.afile.set_whiteout() self.assertFalse(self.afile.is_whiteout)
def setUp(self): self.command1 = Command("yum install nfs-utils") self.command2 = Command("yum remove nfs-utils") self.image = TestImage('5678efgh') self.file = FileData('README.txt', '/home/test') cache.cache = {}
class TestClassFileData(unittest.TestCase): def setUp(self): self.afile = FileData('afile', 'path/to/afile') self.afile.licenses = ['MIT', 'GPL'] def tearDown(self): del self.afile def testInstance(self): file1 = FileData('file1', 'path/to/file1') self.assertEqual(file1.name, 'file1') self.assertEqual(file1.path, 'path/to/file1') self.assertFalse(file1.checksum_type) self.assertFalse(file1.checksum) self.assertFalse(file1.date) self.assertFalse(file1.version_control) self.assertFalse(file1.version) self.assertFalse(file1.file_type) self.assertFalse(file1.licenses) self.assertFalse(file1.license_expressions) self.assertFalse(file1.copyrights) self.assertFalse(file1.authors) self.assertFalse(file1.packages) self.assertFalse(file1.urls) with self.assertRaises(ValueError): file2 = FileData('file2', 'path/to/file2', '12355') file2 = FileData('file2', 'path/to/file2', '2020-01-01', 'binary') self.assertEqual(file2.date, '2020-01-01') self.assertEqual(file2.file_type, 'binary') file2.licenses = ['MIT', 'GPL'] file2.license_expressions = ['GPLv2 or MIT', 'MIT and GPLv2'] file2.copyrights = ['copyrights'] file2.authors = ['author1', 'author2'] file2.packages = ['package1', 'package2'] self.assertEqual(file2.licenses, ['MIT', 'GPL']) self.assertEqual(file2.license_expressions, ['GPLv2 or MIT', 'MIT and GPLv2']) self.assertEqual(file2.copyrights, ['copyrights']) self.assertEqual(file2.authors, ['author1', 'author2']) self.assertEqual(file2.packages, ['package1', 'package2']) def testSetChecksum(self): self.afile.set_checksum('sha256', '12345abcde') self.assertEqual(self.afile.checksum_type, 'sha256') self.assertEqual(self.afile.checksum, '12345abcde') def testSetVersion(self): self.afile.set_version('git', '12345abcde') self.assertEqual(self.afile.version_control, 'git') self.assertEqual(self.afile.version, '12345abcde') def testToDict(self): file_dict = self.afile.to_dict() self.assertEqual(file_dict['name'], 'afile') self.assertEqual(file_dict['path'], 'path/to/afile') self.assertEqual(file_dict['licenses'], ['MIT', 'GPL']) def testToDictTemplate(self): template1 = TestTemplate1() template2 = TestTemplate2() dict1 = self.afile.to_dict(template1) dict2 = self.afile.to_dict(template2) self.assertEqual(len(dict1.keys()), 3) self.assertEqual(dict1['file.name'], 'afile') self.assertEqual(dict1['file.path'], 'path/to/afile') self.assertEqual(dict1['file.licenses'], ['MIT', 'GPL']) self.assertFalse(dict2['notes'])