def evaluate(self, image_obj, context): match_decoded = self.regex_name.value() if match_decoded: match_encoded = ensure_str(base64.b64encode(ensure_bytes(match_decoded))) for thefile, regexps in list(context.data.get("content_regexp", {}).items()): thefile = ensure_str(thefile) if not regexps: continue for regexp in regexps.keys(): found = False decoded_regexp = ensure_str(base64.b64decode(ensure_bytes(regexp))) try: regexp_name, theregexp = decoded_regexp.split("=", 1) except: regexp_name = None theregexp = decoded_regexp if not match_decoded: found = True elif theregexp == match_decoded or regexp == match_encoded: found = True elif regexp_name and regexp_name == match_decoded: found = True if found: self._fire( msg="File content analyzer found regexp match in container: file={} regexp={}".format( thefile, decoded_regexp ) )
def __init__(self, pkg): super().__init__(pkg, "rpm") self.license = ensure_str(pkg.get("license", "")) self.arch = ensure_str(pkg.get("arch", "x86_64")) self.release = ensure_str(pkg.get("release", "")) self.source = ensure_str(pkg.get("source", "")) self.size = ensure_str(str(pkg.get("size", "0")))
def evaluate(self, image_obj, context): match_filter = self.secret_contentregexp.value(default_if_none=[]) if match_filter: matches = [ensure_str(base64.b64encode(ensure_bytes(x))) for x in match_filter] matches_decoded = match_filter else: matches = [] matches_decoded = [] for thefile, regexps in list(context.data.get('secret_content_regexp', {}).items()): thefile = ensure_str(thefile) if not regexps: continue for regexp in regexps.keys(): decoded_regexp = ensure_str(base64.b64decode(ensure_bytes(regexp))) try: regexp_name, theregexp = decoded_regexp.split("=", 1) except: regexp_name = None theregexp = decoded_regexp if not matches: self._fire(msg='Secret search analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp)) elif regexp in matches or theregexp in matches_decoded: self._fire(msg='Secret search analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp)) elif regexp_name and regexp_name in matches_decoded: self._fire(msg='Secret search analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp))
def __init__(self, pkg): super().__init__(pkg, ensure_str(pkg.get("type", "java")).lower()) self.jar_type = "%s-jar" % self.type self.location = ensure_str( pkg.get("location", "/virtual/javapkg/%s-%s.jar" % (self.name, self.version))) self.metadata = pkg.get("metadata", {})
def __init__(self, pkg): super().__init__(pkg, ensure_str(pkg.get("type", "python")).lower()) self.license = ensure_str(pkg.get("license", "")) self.files = pkg.get("files", []) self.metadata = json.dumps(pkg.get("metadata", {})) self.location = ensure_str( pkg.get("location", "/virtual/pypkg/site-packages"))
def get_tar_filenames(layertar): ret = [] layertarfile = None try: logger.debug("using tarfile library to get file names from tarfile={}".format(layertarfile)) layertarfile = tarfile.open(layertar, mode='r', format=tarfile.PAX_FORMAT) ret = layertarfile.getnames() except: # python tarfile fils to unpack some docker image layers due to PAX header issue, try another method logger.debug("using tar command to get file names from tarfile={}".format(layertarfile)) tarcmd = "tar tf {}".format(layertar) try: ret = [] rc, sout, serr = utils.run_command(tarcmd) sout = utils.ensure_str(sout) serr = utils.ensure_str(serr) if rc == 0 and sout: for line in sout.splitlines(): re.sub("/+$", "", line) ret.append(line) else: raise Exception("rc={} sout={} serr={}".format(rc, sout, serr)) except Exception as err: logger.error("command failed with exception - " + str(err)) raise err finally: if layertarfile: layertarfile.close() return(ret)
def evaluate(self, image_obj, context): match_filter = self.regex_name.value() if match_filter: match_decoded = ensure_str(base64.b64encode(ensure_bytes(match_filter))) else: return for thefile, regexps in list(context.data.get('content_regexp', {}).items()): thefile = ensure_str(thefile) if not regexps: continue for regexp in regexps.keys(): decoded_regexp = ensure_str(base64.b64decode(ensure_bytes(regexp))) try: regexp_name, theregexp = decoded_regexp.split("=", 1) except: regexp_name = None theregexp = decoded_regexp if not match_filter: self._fire(msg='File content analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp)) elif regexp == match_filter or theregexp == match_decoded: self._fire(msg='File content analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp)) elif regexp_name and regexp_name == match_decoded: self._fire(msg='File content analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp))
def evaluate(self, image_obj, context): match_filter = self.secret_contentregexp.value(default_if_none=[]) name_filter = self.name_regexps.value() name_re = re.compile( name_filter) if self.name_regexps.value() else None match_type = self.match_type.value(default_if_none="found") if match_filter: matches = [base64.b64encode(ensure_bytes(x)) for x in match_filter] matches_decoded = match_filter else: matches = [] matches_decoded = [] onefound = False for thefile, regexps in list( context.data.get("secret_content_regexp", {}).items()): thefile = ensure_str(thefile) if not regexps: continue if regexps and (not name_re or name_re.match(thefile)): for regexp in list(regexps.keys()): found = False decoded_regexp = ensure_str( base64.b64decode(ensure_bytes(regexp))) try: regexp_name, theregexp = decoded_regexp.split("=", 1) except: regexp_name = None theregexp = decoded_regexp if not matches: found = onefound = True elif regexp in matches or theregexp in matches_decoded: found = onefound = True elif regexp_name and regexp_name in matches_decoded: found = onefound = True if found and match_type == "found": self._fire( msg= "Secret content search analyzer found regexp match in container: file={} regexp={}" .format(thefile, decoded_regexp)) if not onefound and match_type == "notfound": f_filter = name_filter if not f_filter: f_filter = "*" m_filter = match_filter if not m_filter: m_filter = "all" self._fire( msg= "Secret content search analyzer did not find regexp match in container: filename_regex={} content_regex_name={}" .format(f_filter, m_filter))
def __init__(self, pkg): super().__init__(pkg, ensure_str(pkg.get("type", "npm")).lower()) self.version = self.get_list_value(pkg, "version") if self.version: self.latest_version = self.version[0] else: self.latest_version = "" self.origin = self.get_list_value(pkg, "origin") self.license = self.get_list_value(pkg, "license") self.source = ensure_str(pkg.get("source", self.name)) self.files = pkg.get("files", []) self.location = ensure_str(pkg.get("location", ""))
def handle_tar_error(tarcmd, rc, sout, serr, unpackdir=None, rootfsdir=None, layer=None, layertar=None, layers=[]): handled = False handled_post_metadata = {} try: slinkre = "tar: (.*): Cannot open: File exists" hlinkre = "tar: (.*): Cannot hard link to `(.*)': No such file or directory" for errline in serr.splitlines(): patt = re.match(slinkre, errline) patt1 = re.match(hlinkre, errline) if patt: matchfile = patt.group(1) logger.debug("found 'file exists' error on name: " + str(matchfile)) if matchfile: badfile = os.path.join(rootfsdir, patt.group(1)) if os.path.exists(badfile): logger.debug("removing hierarchy: " + str(badfile)) shutil.rmtree(badfile) handled = True elif patt1: missingfile = patt1.group(2) basedir = os.path.dirname(missingfile) logger.debug("found 'hard link' error on name: {}".format(missingfile)) if not os.path.exists(os.path.join(rootfsdir, missingfile)): for l in layers[layers.index("sha256:"+layer)::-1]: missingdir = None if not os.path.exists(os.path.join(rootfsdir, basedir)): missingdir = basedir tarcmd = "tar -C {} -x -f {} {}".format(rootfsdir, layertar, missingfile) rc, sout, serr = utils.run_command(tarcmd) sout = utils.ensure_str(sout) serr = utils.ensure_str(serr) if rc == 0: if not handled_post_metadata.get('temporary_file_adds', False): handled_post_metadata['temporary_file_adds'] = [] handled_post_metadata['temporary_file_adds'].append(missingfile) if missingdir: if not handled_post_metadata.get('temporary_dir_adds', False): handled_post_metadata['temporary_dir_adds'] = [] handled_post_metadata['temporary_dir_adds'].append(missingdir) handled = True break except Exception as err: raise err logger.debug("tar error handled: {}".format(handled)) return(handled, handled_post_metadata)
def get_config(): ret = {} logger.debug("fetching local anchore anchore_engine.configuration") if True: cmd = ['anchore', '--json', 'system', 'status', '--conf'] try: rc, sout, serr = anchore_engine.utils.run_command_list(cmd) sout = utils.ensure_str(sout) serr = utils.ensure_str(serr) ret = json.loads(sout) except Exception as err: logger.error(str(err)) return (ret)
def _safe_base64_encode(data_provider): try: return utils.ensure_str( base64.encodebytes(utils.ensure_bytes(data_provider()))) except Exception as err: logger.warn("could not base64 encode content - exception: %s", err) return ""
def prepare_context(self, image_obj, context): """ prepare the context by extracting the /etc/passwd content for the image from the analysis artifacts list if it is found. loads from the db. This is an optimization and could removed, but if removed the triggers should be updated to do the queries directly. :rtype: :param image_obj: :param context: :return: """ content_matches = image_obj.analysis_artifacts.filter( AnalysisArtifact.analyzer_id == "retrieve_files", AnalysisArtifact.analyzer_artifact == "file_content.all", AnalysisArtifact.analyzer_type == "base", AnalysisArtifact.artifact_key == "/etc/passwd", ).first() if content_matches: try: pentries = {} for line in ensure_str( content_matches.binary_value).splitlines(): line = line.strip() pentry = line.split(":") pentries[pentry[0]] = pentry[1:] context.data["passwd_entries"] = pentries except Exception as e: raise e return context
def run_anchore_analyzers(staging_dirs, imageDigest, imageId, localconfig): outputdir = staging_dirs['outputdir'] unpackdir = staging_dirs['unpackdir'] copydir = staging_dirs['copydir'] configdir = localconfig['service_dir'] # run analyzers #anchore_module_root = resource_filename("anchore", "anchore-modules") anchore_module_root = resource_filename("anchore_engine", "analyzers") analyzer_root = os.path.join(anchore_module_root, "modules") for f in list_analyzers(): #for f in os.listdir(analyzer_root): # thecmd = os.path.join(analyzer_root, f) # if re.match(".*\.py$", thecmd): cmdstr = " ".join([f, configdir, imageId, unpackdir, outputdir, unpackdir]) if True: try: rc, sout, serr = utils.run_command(cmdstr) sout = utils.ensure_str(sout) serr = utils.ensure_str(serr) if rc != 0: raise Exception("command failed: cmd="+str(cmdstr)+" exitcode="+str(rc)+" stdout="+str(sout).strip()+" stderr="+str(serr).strip()) else: logger.debug("command succeeded: cmd="+str(cmdstr)+" stdout="+str(sout).strip()+" stderr="+str(serr).strip()) except Exception as err: logger.error("command failed with exception - " + str(err)) #raise err analyzer_report = {} for analyzer_output in os.listdir(os.path.join(outputdir, "analyzer_output")): if analyzer_output not in analyzer_report: analyzer_report[analyzer_output] = {} for analyzer_output_el in os.listdir(os.path.join(outputdir, "analyzer_output", analyzer_output)): if analyzer_output_el not in analyzer_report[analyzer_output]: analyzer_report[analyzer_output][analyzer_output_el] = {'base': {}} data = read_kvfile_todict(os.path.join(outputdir, "analyzer_output", analyzer_output, analyzer_output_el)) if data: analyzer_report[analyzer_output][analyzer_output_el]['base'] = read_kvfile_todict(os.path.join(outputdir, "analyzer_output", analyzer_output, analyzer_output_el)) else: analyzer_report[analyzer_output].pop(analyzer_output_el, None) if not analyzer_report[analyzer_output]: analyzer_report.pop(analyzer_output, None) return(analyzer_report)
def get_paged_feed_group_data(self, feed, group, since=None, next_token=None): if type(since) == datetime.datetime: since = since.isoformat() files = [] group_path = os.path.join(self.src_path, feed, group) if next_token: next_token = ensure_str( base64.decodebytes(ensure_bytes(next_token))) data = [] size = 0 token = None back_boundary = since forward_boundary = self.newest_allowed.isoformat( ) if self.newest_allowed else None logger.debug( 'Getting data for {}/{} with back boundary {} and forward boundary {}' .format(feed, group, back_boundary, forward_boundary)) for datafile_name in sorted(os.listdir(group_path)): if (not back_boundary or (datafile_name >= back_boundary)) and ( not forward_boundary or (forward_boundary and datafile_name <= forward_boundary)) and ( not next_token or datafile_name >= next_token): logger.debug('Using data file {}'.format(datafile_name)) fpath = os.path.join(group_path, datafile_name) s = os.stat(fpath) if size + s.st_size > self.max_content_size: token = datafile_name break else: size += s.st_size with open(fpath) as f: content = json.load(f) data += content else: logger.debug('Data file {} outside of bounds, skipping'.format( datafile_name)) continue return data, ensure_str(base64.encodebytes( ensure_bytes(token))) if token else None
def refresh_ecr_credentials(registry, access_key_id, secret_access_key): localconfig = anchore_engine.configuration.localconfig.get_config() try: account_id, region = parse_registry_url(registry) # aws: assume role on the ec2 instance if access_key_id == 'awsauto' or secret_access_key == 'awsauto': if 'allow_awsecr_iam_auto' in localconfig and localconfig[ 'allow_awsecr_iam_auto']: access_key_id = secret_access_key = None client = boto3.client('ecr', aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key, region_name=region) else: raise Exception( "registry is set to 'awsauto', but system is not configured to allow (allow_awsecr_iam_auto: False)" ) # aws: assume cross account roles elif access_key_id == '_iam_role': try: sts = boto3.client('sts') session = sts.assume_role(RoleArn=secret_access_key, RoleSessionName=str(int( time.time()))) access_key_id = session['Credentials']['AccessKeyId'] secret_access_key = session['Credentials']['SecretAccessKey'] session_token = session['Credentials']['SessionToken'] client = boto3.client('ecr', aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key, aws_session_token=session_token, region_name=region) except Exception as err: raise err # aws: provide key & secret else: client = boto3.client('ecr', aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key, region_name=region) r = client.get_authorization_token(registryIds=[account_id]) ecr_data = r['authorizationData'][0] except Exception as err: logger.warn("failure to get/refresh ECR credential - exception: " + str(err)) raise err ret = {} ret['authorizationToken'] = utils.ensure_str( base64.decodebytes(utils.ensure_bytes(ecr_data['authorizationToken']))) ret['expiresAt'] = int(ecr_data['expiresAt'].strftime('%s')) return (ret)
def __enter__(self): if not self._tarfile or self._tarfile.closed: self._tarfile = tarfile.open(name=self.backing_file_path, mode='{}:gz'.format(self._tar_mode)) if self._tar_mode == 'r': # Try to load the manifest manifest_data = self._tarfile.extractfile(self.__manifest_name__) self.manifest = ArchiveManifest.from_json(json.loads(ensure_str(manifest_data.read()))) return self
def get_list_value(pkg, key): if not key: logger.debug("cannot get list value when key is not specified") return [] value = ensure_str(pkg.get(key, "")) # For Gem/NPM hints, we search both the singular and plural key name, where the plural is a list list_value = pkg.get(key + "s", []) if value and not list_value: list_value = [value] return list_value
def _trigger_id(scanner, file, signature): """ Trigger id is a string, but encoded as scanner name, signature, and m5hash of the file path (to keep size within reasonable bounds) :param scanner: :param file: :param signature: :return: """ return '{}+{}+{}'.format( scanner, signature, ensure_str(hashlib.md5(ensure_bytes(file)).hexdigest()))
def evaluate(self, image_obj, context): # decode the param regexes from b64 regex_param = self.regex.value() files = [] if hasattr(context, 'data'): files = context.data.get('filenames') for thefile in files: thefile = ensure_str(thefile) if re.match(regex_param, thefile): self._fire(msg='Application of regex matched file found in container: file={} regexp={}'.format(thefile, regex_param))
def retrieved_file_to_mgs(artifact_record): """ :param artifact_record: :return: """ log.info("File value: {}".format(artifact_record.binary_value)) return { "path": artifact_record.artifact_key, "b64_content": ensure_str(base64.encodebytes(artifact_record.binary_value)), }
def get_image_content_data(self, image_digest): try: return json.loads( utils.ensure_str( self.obj_mgr.get(self.account_id, "image_content_data", image_digest)))["document"] except Exception as err: logger.error("Failed to load image content data") raise make_anchore_exception( err, input_message="cannot fetch content data from archive", input_httpcode=500, )
def __init__(self, pkg): super().__init__(pkg, ensure_str(pkg.get("type", "go").lower())) self.license = ensure_str(pkg.get("license", "")) self.arch = ensure_str(pkg.get("arch", "x86_64")) self.source = ensure_str(pkg.get("source", "")) self.size = ensure_str(str(pkg.get("size", "0"))) self.metadata = json.dumps(pkg.get("metadata", {})) self.location = ensure_str(pkg.get("location", ""))
def test_token_manager_secret(): """ Test the token manager using a shared secret :return: """ mgr = JwtTokenManager(config={'secret': 'abc123'}) t = mgr.generate_token('testuser') mgr.verify_token(t) t2 = ensure_str(t) t2 += '-' with pytest.raises(Exception) as ex: mgr.verify_token(t2)
def test_token_manager_keys(): mgr = JwtTokenManager( config={ 'public_key_path': 'test/data/certs/public.pem', 'private_key_path': 'test/data/certs/private.pem' }) t = mgr.generate_token('testuser') mgr.verify_token(t) t2 = ensure_str(t) t2 += '-' with pytest.raises(Exception) as ex: mgr.verify_token(t2)
def evaluate(self, image_obj, context): if not context.data.get("retrieved_files"): return re_value = self.regex.value() check_type = self.check.value() path = self.file_path.value() file = self.get_file(context) compiled_re = re.compile(re_value) if ( re_value is None or check_type is None or compiled_re is None or file is None or file.binary_value is None ): return # Decode b64 try: file_content = ensure_str(file.binary_value) except Exception as e: logger.exception( "Could not decode/process file content for {} in image {}/{} to do regex check".format( path, image_obj.user_id, image_obj.id ) ) raise Exception( "Cannot execute regex check due to error processing file content" ) if file_content is None: return match_found = False for line in file_content.split(): match_found = match_found or (compiled_re.match(line) is not None) if match_found == (check_type == "match"): self._fire( instance_id=self._construct_match_id(), msg="Content regex '{}' check '{}' found in retrieved file '{}'".format( re_value, check_type, path ), )
def evaluate(self, image_obj, context): fname_regexps = self.name_regexps.value(default_if_none=[]) if not fname_regexps: # Short circuit return if context.data.get('filenames'): files = context.data.get('filenames') else: files = list(image_obj.fs.files().keys()) # returns a map of path -> entry for thefile in files: thefile = ensure_str(thefile) for regexp in fname_regexps: if re.match(regexp, thefile): self._fire(msg='Application of regexp matched file found in container: file={} regexp={}'.format(thefile, regexp))
def save_import_content(db_session, operation_id: str, content: bytes, content_type: str) -> tuple: """ Generic handler for content type saving that does not do any validation. :param operation_id: :param sbom: :return: """ hasher = sha256(content) # Direct bytes hash digest = hasher.digest().hex() found_content = (db_session.query(ImageImportContent).filter( ImageImportContent.operation_id == operation_id, ImageImportContent.content_type == content_type, ImageImportContent.digest == digest, ).one_or_none()) if found_content: logger.info("Found existing record {}".format(found_content.digest)) # Short circuit since already present return found_content.digest, found_content.created_at import_bucket = generate_import_bucket() key = generate_key(ApiRequestContextProxy.namespace(), operation_id, content_type, digest) content_record = ImageImportContent() content_record.account = ApiRequestContextProxy.namespace() content_record.digest = digest content_record.content_type = content_type content_record.operation_id = operation_id content_record.content_storage_bucket = import_bucket content_record.content_storage_key = key db_session.add(content_record) db_session.flush() mgr = manager.object_store.get_manager() resp = mgr.put_document(ApiRequestContextProxy.namespace(), import_bucket, key, ensure_str(content)) if not resp: # Abort the transaction raise Exception("Could not save into object store") return digest, content_record.created_at
def get_document(self, userId: str, bucket: str, archiveId: str): """ Retrieve the content of the document json-decoded. :param userId: :param bucket: :param archiveId: :return: json parsed content (e.g. object), or None if not found """ if not self.archive_clients: raise Exception("archive not initialized") archive_document = self.get(userId, bucket, archiveId) if archive_document is not None: return json.loads(utils.ensure_str(archive_document)).get('document') else: return None
def test_token_manager_secret(): """ Test the token manager using a shared secret :return: """ mgr = JwtTokenManager( oauth_config={"enabled": True, "default_token_expiration_seconds": 180}, keys_config={"secret": "abc123"}, ) t = mgr.generate_token("testuser") mgr.verify_token(t) t2 = ensure_str(t) t2 += "-" with pytest.raises(Exception) as ex: mgr.verify_token(t2)