def test_memory_time_setters(archfmt, timefmt): has_birthtime = archfmt != 'zip' # Create an archive of our libarchive/ directory buf = bytes(bytearray(1000000)) with memory_writer(buf, archfmt) as archive1: archive1.add_files('libarchive/') atimestamp = (1482144741, 495628118) mtimestamp = (1482155417, 659017086) ctimestamp = (1482145211, 536858081) btimestamp = (1482144740, 495628118) buf2 = bytes(bytearray(1000000)) with memory_reader(buf) as archive1: with memory_writer(buf2, archfmt) as archive2: for entry in archive1: entry.set_atime(*atimestamp) entry.set_mtime(*mtimestamp) entry.set_ctime(*ctimestamp) if has_birthtime: entry.set_birthtime(*btimestamp) archive2.add_entries([entry]) with memory_reader(buf2) as archive2: for entry in archive2: assert entry.atime == time_check(atimestamp, timefmt) assert entry.mtime == time_check(mtimestamp, timefmt) assert entry.ctime == time_check(ctimestamp, timefmt) if has_birthtime: assert entry.birthtime == time_check(btimestamp, timefmt)
def test_memory_time_setters(archfmt, timefmt): has_birthtime = archfmt != 'zip' # Create an archive of our libarchive/ directory buf = bytes(bytearray(1000000)) with memory_writer(buf, archfmt) as archive1: archive1.add_files('libarchive/') atimestamp = (1482144741, 495628118) mtimestamp = (1482155417, 659017086) ctimestamp = (1482145211, 536858081) btimestamp = (1482144740, 495628118) buf2 = bytes(bytearray(1000000)) with memory_reader(buf) as archive1: with memory_writer(buf2, archfmt) as archive2: for entry in archive1: entry.set_atime(*atimestamp) entry.set_mtime(*mtimestamp) entry.set_ctime(*ctimestamp) if has_birthtime: entry.set_birthtime(*btimestamp) archive2.add_entries([entry]) with memory_reader(buf2) as archive2: for entry in archive2: assert entry.atime == time_check(atimestamp, timefmt) assert entry.mtime == time_check(mtimestamp, timefmt) assert entry.ctime == time_check(ctimestamp, timefmt) if has_birthtime: assert entry.birthtime == time_check(btimestamp, timefmt)
def scan(self, data, file, options, expire_at): file_limit = options.get('limit', 1000) self.event['total'] = {'files': 0, 'extracted': 0} try: with libarchive.memory_reader(data) as archive: for entry in archive: self.event['total']['files'] += 1 if entry.isfile: if self.event['total']['extracted'] >= file_limit: continue extract_file = strelka.File( name=entry.pathname, source=self.name, ) for block in entry.get_blocks(): self.upload_to_cache( extract_file.pointer, block, expire_at, ) self.files.append(extract_file) self.event['total']['extracted'] += 1 except libarchive.ArchiveError: self.flags.append('libarchive_archive_error')
def test_entry_properties(): buf = bytes(bytearray(1000000)) with memory_writer(buf, 'gnutar') as archive: archive.add_files('README.rst') readme_stat = stat('README.rst') with memory_reader(buf) as archive: for entry in archive: assert entry.uid == readme_stat.st_uid assert entry.gid == readme_stat.st_gid assert entry.mode == readme_stat.st_mode assert not entry.isblk assert not entry.ischr assert not entry.isdir assert not entry.isfifo assert not entry.islnk assert not entry.issym assert not entry.linkpath assert entry.linkpath == entry.linkname assert entry.isreg assert entry.isfile assert not entry.issock assert not entry.isdev assert b'rw' in entry.strmode assert entry.pathname == entry.path assert entry.pathname == entry.name
def test_entry_properties(): buf = bytes(bytearray(1000000)) with memory_writer(buf, 'gnutar') as archive: archive.add_files('README.rst') readme_stat = stat('README.rst') with memory_reader(buf) as archive: for entry in archive: assert entry.uid == readme_stat.st_uid assert entry.gid == readme_stat.st_gid assert entry.mode == readme_stat.st_mode assert not entry.isblk assert not entry.ischr assert not entry.isdir assert not entry.isfifo assert not entry.islnk assert not entry.issym assert not entry.linkpath assert entry.linkpath == entry.linkname assert entry.isreg assert entry.isfile assert not entry.issock assert not entry.isdev assert b'rw' in entry.strmode assert entry.pathname == entry.path assert entry.pathname == entry.name
def execute(self, ignore_errors=True, use_progressbar=False): """Collect PyPI keywords.""" keywords_set = KeywordsSet() _logger.debug("Fetching StackOverflow") response = requests.get(self._STACKOVERFLOW_URL) if response.ok is not True: raise RuntimeError( "Failed to fetch '%s', request ended with status code %s" % (self._STACKOVERFLOW_URL, response.status_code)) tags = None _logger.debug("Unpacking StackOverflow's tags archive") with libarchive.memory_reader(response.content) as archive: for entry in archive: if entry.name == 'Tags.xml': tags = xmltodict.parse(b"".join(entry.get_blocks())) break for tag in tags['tags']['row']: try: keywords_set.add(tag['@TagName'], int(tag['@Count'])) except ValueError: _logger.warning( "Failed to parse number of occurrences for tag %s", tag['@TagName']) continue except KeyError: _logger.exception("Missing tagname or tag count") continue return keywords_set
def scan(self, file_object, options): file_limit = options.get("limit", 1000) self.metadata["total"] = {"files": 0, "extracted": 0} try: with libarchive.memory_reader(file_object.data) as archive: for entry in archive: self.metadata["total"]["files"] += 1 if entry.isfile: if self.metadata["total"]["extracted"] >= file_limit: continue child_file = b"".join(entry.get_blocks()) if entry.pathname: child_filename = f"{self.scanner_name}::{entry.pathname}" else: child_filename = f"{self.scanner_name}::size_{len(child_file)}" child_fo = objects.StrelkaFile( data=child_file, filename=child_filename, depth=file_object.depth + 1, parent_uid=file_object.uid, root_uid=file_object.root_uid, parent_hash=file_object.hash, root_hash=file_object.root_hash, source=self.scanner_name) self.children.append(child_fo) self.metadata["total"]["extracted"] += 1 except libarchive.ArchiveError: file_object.flags.append( f"{self.scanner_name}::libarchive_archive_error")
def test_entry_sparse_manual(tmpdir, sparse_map): """ Can we archive a partial non-sparse file as sparse """ fname = tmpdir.join('sparse1').strpath size = 8192 with open(fname, 'w') as testf: testf.write(generate_contents(8192)) buf = bytes(bytearray(1000000)) with memory_writer(buf, 'pax') as archive: with new_archive_entry_from_path(fname) as entry: assert len(entry.sparse_map) == 0 entry.sparse_map.extend(sparse_map) # not using archive.add_entries, that assumes the entry comes from # another archive and tries to use entry.get_blocks() write_p = archive._pointer ffi.write_header(write_p, entry.entry_p) with open(fname, 'rb') as testf: entry_data = testf.read() ffi.write_data(write_p, entry_data, len(entry_data)) ffi.write_finish_entry(write_p) with memory_reader(buf) as archive: for entry in archive: assert entry.name == fname.lstrip('/') assert entry.mode == stat(fname)[0] assert entry.size == size assert len(entry.sparse_map) == len(sparse_map) assert entry.sparse_map == sparse_map
def test_entry_properties(): buf = bytes(bytearray(1000000)) with memory_writer(buf, 'gnutar') as archive: archive.add_files('README.rst') with memory_reader(buf) as archive: for entry in archive: assert entry.mode == stat('README.rst')[0] assert b'rw' in entry.strmode
def test_convert(): # Collect information on what should be in the archive tree = treestat('libarchive') # Create an archive of our libarchive/ directory buf = bytes(bytearray(1000000)) with memory_writer(buf, 'gnutar', 'xz') as archive1: archive1.add_files('libarchive/') # Convert the archive to another format buf2 = bytes(bytearray(1000000)) with memory_reader(buf) as archive1: with memory_writer(buf2, 'zip') as archive2: archive2.add_entries(archive1) # Check the data with memory_reader(buf2) as archive2: check_archive(archive2, tree)
def test_convert(): # Collect information on what should be in the archive tree = treestat('libarchive') # Create an archive of our libarchive/ directory buf = bytes(bytearray(1000000)) with memory_writer(buf, 'gnutar', 'xz') as archive1: archive1.add_files('libarchive/') # Convert the archive to another format buf2 = bytes(bytearray(1000000)) with memory_reader(buf) as archive1: with memory_writer(buf2, 'zip') as archive2: archive2.add_entries(archive1) # Check the data with memory_reader(buf2) as archive2: check_archive(archive2, tree)
def test_entry_properties(): buf = bytes(bytearray(1000000)) with memory_writer(buf, 'gnutar') as archive: archive.add_files('README.rst') with memory_reader(buf) as archive: for entry in archive: assert entry.mode == stat('README.rst')[0] assert b'rw' in entry.strmode
def test_memory_atime_ctime(archfmt, timefmt): # Collect information on what should be in the archive tree = treestat('libarchive', stat_dict) # Create an archive of our libarchive/ directory buf = bytes(bytearray(1000000)) with memory_writer(buf, archfmt) as archive1: archive1.add_files('libarchive/') # Check the data with memory_reader(buf) as archive2: check_atime_ctime(archive2, tree, timefmt=timefmt)
def test_memory_atime_ctime(): # Collect information on what should be in the archive tree = treestat('libarchive', stat_dict) # Create an archive of our libarchive/ directory buf = bytes(bytearray(1000000)) with memory_writer(buf, 'zip') as archive1: archive1.add_files('libarchive/') # Check the data with memory_reader(buf) as archive2: check_atime_ctime(archive2, tree)
def test_memory_time_setters(): # Create an archive of our libarchive/ directory atimestamp = (1482144741, 495628118) mtimestamp = (1482155417, 659017086) ctimestamp = (1482145211, 536858081) buf = bytes(bytearray(1000000)) with memory_writer(buf, "zip") as archive1: archive1.add_files('libarchive/') buf2 = bytes(bytearray(1000000)) with memory_reader(buf) as archive1: with memory_writer(buf2, "zip") as archive2: for entry in archive1: entry.set_atime(*atimestamp) entry.set_mtime(*mtimestamp) entry.set_ctime(*ctimestamp) archive2.add_entries([entry]) with memory_reader(buf2) as archive2: for entry in archive2: assert entry.atime == atimestamp[0] assert entry.mtime == mtimestamp[0] assert entry.ctime == ctimestamp[0]
def test_entry_sparse(tmpdir, size, write_map, sparse_map): """ Test that we can write & read back a sparse file and the sparse map """ fname = tmpdir.join('sparse1').strpath create_sparse_file(fname, write_map, size) buf = bytes(bytearray(1000000)) with memory_writer(buf, 'pax') as archive: archive.add_files(fname) with memory_reader(buf) as archive: for entry in archive: assert entry.name == fname.lstrip('/') assert entry.mode == stat(fname)[0] assert entry.size == size assert len(entry.sparse_map) == len(sparse_map) assert entry.sparse_map == sparse_map
def decompress_7z(iri, r, red): """Download a 7z file, decompress it and store contents in redis.""" data = load_data(iri, r) log = logging.getLogger(__name__) expiration = expire_table[KeyRoot.DATA] deco_size_total = 0 with libarchive.memory_reader(data) as archive: for entry in archive: try: name = str(entry) except: name = str(uuid.uuid4()) if len(name) == 0: if iri.endswith('.zip'): sub_iri = iri[:-4] else: sub_iri = f'{iri}/{name}' log.error(f'Empty name, iri: {iri!s}') else: sub_iri = f'{iri}/{name}' sub_key = data_key(sub_iri) log.debug(f'Store {name} into {sub_key}') conlen = 0 if not red.exists(sub_key): red.sadd('purgeable', sub_key) for block in entry.get_blocks(): if len(block) + conlen > MAX_CONTENT_LENGTH: # Will fail due to redis limitation red.expire(sub_key, 0) raise SizeException(name) red.append(sub_key, block) conlen = conlen + len(block) red.expire(sub_key, expiration) monitor.log_size(conlen) log.debug(f'Subfile has size {conlen}') deco_size_total = deco_size_total + conlen else: log.warn(f'Data already exists for {sub_iri}') if conlen > 0: yield sub_iri log.debug(f'Done decompression, total decompressed size {deco_size_total}')
def test_adding_entry_from_memory(): entry_path = 'this is path' entry_data = 'content' entry_size = len(entry_data) blocks = [] def write_callback(data): blocks.append(data[:]) return len(data) with libarchive.custom_writer(write_callback, 'zip') as archive: archive.add_file_from_memory(entry_path, entry_size, entry_data) buf = b''.join(blocks) with libarchive.memory_reader(buf) as memory_archive: for archive_entry in memory_archive: assert entry_data.encode() == b''.join(archive_entry.get_blocks()) assert archive_entry.path == entry_path
def test_custom_writer(): # Collect information on what should be in the archive tree = treestat('libarchive') # Create an archive of our libarchive/ directory blocks = [] def write_cb(data): blocks.append(data[:]) return len(data) with libarchive.custom_writer(write_cb, 'zip') as archive: archive.add_files('libarchive/') pass # Read the archive and check that the data is correct buf = b''.join(blocks) with libarchive.memory_reader(buf) as archive: check_archive(archive, tree)
def decompress(data): if not data.startswith('MZ') or '!Require Windows' not in data: log.debug('[-] not an sfx archive (1)') raise StopIteration idx = data.find('!@InstallEnd@!\r\n') if idx == -1: log.debug('[-] not an sfx archive (2)') raise StopIteration if data[idx + 16:idx + 18] != '7z': log.debug('[-] not an sfx archive (3)') raise StopIteration data = data[idx + 16:] with libarchive.memory_reader(data) as archve: for ent in archve: e = sfx_ent(ent) yield e
def test_buffers(tmpdir): # Collect information on what should be in the archive tree = treestat('libarchive') # Create an archive of our libarchive/ directory buf = bytes(bytearray(1000000)) with libarchive.memory_writer(buf, 'gnutar', 'xz') as archive: archive.add_files('libarchive/') # Read the archive and check that the data is correct with libarchive.memory_reader(buf) as archive: check_archive(archive, tree) # Extract the archive in tmpdir and check that the data is intact with in_dir(tmpdir.strpath): flags = EXTRACT_OWNER | EXTRACT_PERM | EXTRACT_TIME libarchive.extract_memory(buf, flags) tree2 = treestat('libarchive') assert tree2 == tree
def test_custom_writer(): # Collect information on what should be in the archive tree = treestat('libarchive') # Create an archive of our libarchive/ directory blocks = [] def write_cb(data): blocks.append(data[:]) return len(data) with libarchive.custom_writer(write_cb, 'zip') as archive: archive.add_files('libarchive/') pass # Read the archive and check that the data is correct buf = b''.join(blocks) with libarchive.memory_reader(buf) as archive: check_archive(archive, tree)
def test_buffers(tmpdir): # Collect information on what should be in the archive tree = treestat('libarchive') # Create an archive of our libarchive/ directory buf = bytes(bytearray(1000000)) with libarchive.memory_writer(buf, 'gnutar', 'xz') as archive: archive.add_files('libarchive/') # Read the archive and check that the data is correct with libarchive.memory_reader(buf) as archive: check_archive(archive, tree) # Extract the archive in tmpdir and check that the data is intact with in_dir(tmpdir.strpath): flags = EXTRACT_OWNER | EXTRACT_PERM | EXTRACT_TIME libarchive.extract_memory(buf, flags) tree2 = treestat('libarchive') assert tree2 == tree
def test_adding_entry_from_memory(archfmt, data_bytes): entry_path = 'testfile.data' entry_data = data_bytes entry_size = len(data_bytes) blocks = [] archfmt = 'zip' has_birthtime = archfmt != 'zip' atime = (1482144741, 495628118) mtime = (1482155417, 659017086) ctime = (1482145211, 536858081) btime = (1482144740, 495628118) if has_birthtime else None def write_callback(data): blocks.append(data[:]) return len(data) with libarchive.custom_writer(write_callback, archfmt) as archive: archive.add_file_from_memory(entry_path, entry_size, entry_data, atime=atime, mtime=mtime, ctime=ctime, birthtime=btime) buf = b''.join(blocks) with libarchive.memory_reader(buf) as memory_archive: for archive_entry in memory_archive: expected = entry_data actual = b''.join(archive_entry.get_blocks()) assert expected == actual assert archive_entry.path == entry_path assert archive_entry.atime in (atime[0], format_time(*atime)) assert archive_entry.mtime in (mtime[0], format_time(*mtime)) assert archive_entry.ctime in (ctime[0], format_time(*ctime)) if has_birthtime: assert archive_entry.birthtime in (btime[0], format_time(*btime))
def test_adding_entry_from_memory(): entry_path = 'this is path' entry_data = 'content' entry_size = len(entry_data) blocks = [] def write_callback(data): blocks.append(data[:]) return len(data) with libarchive.custom_writer(write_callback, 'zip') as archive: archive.add_file_from_memory(entry_path, entry_size, entry_data) buf = b''.join(blocks) with libarchive.memory_reader(buf) as memory_archive: for archive_entry in memory_archive: assert entry_data.encode() == b''.join( archive_entry.get_blocks() ) assert archive_entry.path == entry_path
def main() -> int: parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=""" Generate a C header file to standard output with a preprocessor symbol definition of the physical load address in hexadecimal of the ELF-loader in `payload_filename`. The address is calculated using the description of memory in `platform_filename` and the CPIO archive members embedded in the payload file, including the loadable segments of the ELF objects and a possible DTB (device tree binary) file. The ELF-loader is placed in the first (lowest) sufficiently-large memory region. """) parser.add_argument('--load-rootservers-high', dest='load_rootservers_high', default=False, action='store_true', help='assume ELF-loader will put rootservers at top of' ' memory') parser.add_argument('platform_filename', nargs=1, type=str, help='YAML description of platform parameters (e.g.,' ' platform_gen.yaml)') parser.add_argument('payload_filename', nargs=1, type=str, help='ELF-loader image file (e.g., archive.o)') # Set up some simpler names for argument data and derived information. args = parser.parse_args() image = args.payload_filename[0] image_size = os.path.getsize(image) do_load_rootservers_high = args.load_rootservers_high platform = platform_sift.load_data(args.platform_filename[0]) rootservers = [] is_dtb_present = False is_good_fit = False with libarchive.memory_reader(get_cpio(image)) as archive: for entry in archive: name = entry.name debug('encountered CPIO entry name: {}'.format(name)) if name == 'kernel.elf': kernel_elf = get_bytes(entry) elif name == 'kernel.dtb': # The ELF-loader loads the entire DTB into memory. is_dtb_present = True dtb_size = entry.size elif name.endswith('.bin'): # Skip checksum entries. notice('skipping checkum entry "{}"'.format(name)) else: rootservers.append(get_bytes(entry)) # Enumerate the regions as we encounter them for diagnostic purposes. region_counter = -1 last_region = len(platform['memory']) for region in platform['memory']: region_counter += 1 marker = region['start'] debug_marker_set(marker, 'region {} start'.format(region_counter)) # Note: We assume that the kernel is loaded at the start of memory # because that is what elfloader-tool/src/arch-arm/linker.lds ensures # will happen. This assumption may change in the future! kernel_start = region['start'] kernel_size = elf_sift.get_memory_usage(kernel_elf, align=True) kernel_end = elf_sift.get_aligned_size(kernel_start + kernel_size) marker = kernel_end debug_marker_set(marker, 'kernel_end') if is_dtb_present: dtb_start = marker dtb_end = elf_sift.get_aligned_size(dtb_start + dtb_size) marker = dtb_end debug_marker_set(marker, 'dtb_end') if do_load_rootservers_high and (region_counter == last_region): warn('"--load-rootservers-high" specified but placing' ' ELF-loader in last (or only) region ({} of {}); overlap' ' may not be detected by this tool' .format(region_counter, last_region)) # Deal with the 1..(# of CPUs - 1) possible user payloads, if we're not # loading them in high memory, discontiguously with the kernel. # # TODO: Handle this differently (skipping, or checking to see if we had # to push the kernel so high that it whacks the user payloads--but the # ELF-loader itself should detect that case). At present the case of # multiple rootservers is difficult to debug because it is not # implemented on the archive-construction side; see JIRA SELFOUR-2368. if not do_load_rootservers_high: for elf in rootservers: marker += elf_sift.get_memory_usage(elf, align=True) debug_marker_set(marker, 'end of rootserver') # Note: sel4test_driver eats (almost) 4 more MiB than it claims to. # Fixing this is JIRA SELFOUR-2335. fudge_factor = 4 * 1024 * 1024 marker += elf_sift.get_aligned_size(fudge_factor) debug_marker_set(marker, 'end of (aligned) fudge factor') image_start_address = marker if (image_start_address + image_size) <= region['end']: is_good_fit = True break if not is_good_fit: die('ELF-loader image "{image}" of size 0x{size:x} does not fit within' ' any memory region described in "{yaml}"' .format(image=image, size=image_size, yaml=platform), status=1) sys.stdout.write('#define IMAGE_START_ADDR 0x{load:x}\n' .format(load=image_start_address)) return 0
def _unpack(self): with libarchive.memory_reader(self._data) as archve: for ent in archve: e = sfx_ent(ent) self._ents[unicode(e.pathname)] = e self._unpacked = True