def print_status(self, verbose): """Print file reader status text.""" if verbose >= 1: # print buffer offset print("# File: '%s', stream_offset: %s, count: %s, more: %r" % (self.filename, self.stream_offset, len(self.current), self.more())) if verbose >= 2: # print buffer contents print("# previous: '%s'\nCurrent: '%s'" % (be_scan.escape(self.previous), be_scan.escape(self.current)))
def recurse(uncompressed_buffer, recursion_prefix, depth): """Recursively scan and uncompress until max depth.""" # runtime status if args.verbose >= 1: print("# Processing %s count %d depth %d..." % (recursion_prefix, len(uncompressed_buffer), depth)) if args.verbose >= 2: print("# %s\n" % be_scan.escape(uncompressed_buffer)) # open a scanner artifacts = be_scan.artifacts_t() scanner = be_scan.scanner_t(scan_engine, artifacts) scanner.scan_setup(args.filename, recursion_prefix) # scan status = scanner.scan_final(0, "", uncompressed_buffer) if status: raise ValueError(status) # consume recursed artifacts while True: artifact = artifacts.get() if artifact.blank(): break # prepare for zip or gzip if artifact.artifact_class == "zip" or \ artifact.artifact_class == "gzip": # uncompress uncompressed = uncompressor.uncompress(uncompressed_buffer, artifact.offset) if not uncompressed.status and not uncompressed.buffer: # no error and nothing uncompressed so disregard artifact continue set_compression_text(artifact, uncompressed) # skip popular useless uncompressed data if artifact.artifact == "8da7a0b0144fc58332b03e90aaf7ba25": continue # prepare for other artifact types as needed # none. # print the artifact print(artifact.to_string()) # manage recursion if (artifact.artifact_class == "zip" or \ artifact.artifact_class == "gzip") and \ depth <= MAX_RECURSION_DEPTH: # calculate next recursion prefix next_recursion_prefix = "%s-%d-%s" % ( artifact.recursion_prefix, artifact.offset, artifact.artifact_class.upper()) # recurse recurse(uncompressed.buffer, next_recursion_prefix, depth + 1)
def test_escape(): str_equals(be_scan.escape("a\0b"), "a\\x00b")
def scan_range(filename, offset, page_size, margin_size, max_depth, verbose): with open(filename, 'rb') as f: f.seek(offset) buffer1 = f.read(page_size) buffer2 = f.read(margin_size) # print runtime status if verbose >= 1: # print buffer offset print("# File: '%s', offset: %d, size: %d, overflow size: %d" % (filename, offset, len(buffer1), len(buffer2))) if verbose >= 2: # print buffer contents print("# previous: '%s'\nCurrent: '%s'" % (be_scan.escape(buffer1), be_scan.escape(buffer2))) # the uncompressor this process will use, if needed uncompressor = None # the formatted artifacts formatted_artifacts = list() # create scanner artifacts = be_scan.artifacts_t() scanner = be_scan.scanner_t(scan_engine, artifacts) scanner.scan_setup(args.filename, "") # scan buffer status = scanner.scan_stream(offset, "", buffer1) if status: raise ValueError(status) # scan fence status = scanner.scan_fence_final(offset, buffer1, buffer2) if status: raise ValueError(status) # consume while not artifacts.empty(): artifact = artifacts.get() # check for compression if artifact.artifact_class == "zip" or \ artifact.artifact_class == "gzip": # get uncompressor ready if uncompressor == None: uncompressor = be_scan.uncompressor_t() # get buffer to uncompress buffer1_offset = artifact.offset - offset if buffer1_offset + margin_size > len(buffer1): print("scan_range.a buffer1 offset: %d" % buffer1_offset) # use margin size buffer3 = buffer1[buffer1_offset:] + buffer2 # uncompress uncompressed = uncompressor.uncompress(buffer3, 0) else: # use buffer1 print("scan_range.b buffer1 offset: %d" % buffer1_offset) uncompressed = uncompressor.uncompress( buffer1, buffer1_offset) # no error and nothing uncompressed so disregard this artifact if not uncompressed.status and not uncompressed.buffer: continue # set artifact text for this uncompression set_compression_text(artifact, uncompressed) # skip popular useless uncompressed data if artifact.artifact == "8da7a0b0144fc58332b03e90aaf7ba25": continue # prepare for other artifact types as needed # none. # consume this artifact formatted_artifacts.append(artifact.to_string()) # manage recursion if (artifact.artifact_class == "zip" or \ artifact.artifact_class == "gzip") and \ max_depth >= 1: # calculate recursion prefix for first recursion depth next_recursion_prefix = "%d-%s" % ( artifact.offset, artifact.artifact_class.upper()) # recurse recurse(uncompressed.buffer, next_recursion_prefix, 1, uncompressor, formatted_artifacts, max_depth, verbose) # return the accumulated list of formatted artifacts return formatted_artifacts