def evaluate(self, image_obj, context): match_filter = self.regex_name.value() if match_filter: match_decoded = ensure_str(base64.b64encode(ensure_bytes(match_filter))) else: return for thefile, regexps in list(context.data.get('content_regexp', {}).items()): thefile = ensure_str(thefile) if not regexps: continue for regexp in regexps.keys(): decoded_regexp = ensure_str(base64.b64decode(ensure_bytes(regexp))) try: regexp_name, theregexp = decoded_regexp.split("=", 1) except: regexp_name = None theregexp = decoded_regexp if not match_filter: self._fire(msg='File content analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp)) elif regexp == match_filter or theregexp == match_decoded: self._fire(msg='File content analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp)) elif regexp_name and regexp_name == match_decoded: self._fire(msg='File content analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp))
def render_GET(self, request): try: versions = localconfig.get_versions() except: versions = {} request.responseHeaders.addRawHeader(b"Content-Type", b"application/json") ret = { 'service': { 'version': versions.get('service_version', None), }, 'api': { }, 'db': { 'schema_version': versions.get('db_version', None), } } try: response = utils.ensure_bytes(json.dumps(ret)) except: response = utils.ensure_bytes(json.dumps({})) return response
def evaluate(self, image_obj, context): match_filter = self.secret_contentregexp.value(default_if_none=[]) if match_filter: matches = [ensure_str(base64.b64encode(ensure_bytes(x))) for x in match_filter] matches_decoded = match_filter else: matches = [] matches_decoded = [] for thefile, regexps in list(context.data.get('secret_content_regexp', {}).items()): thefile = ensure_str(thefile) if not regexps: continue for regexp in regexps.keys(): decoded_regexp = ensure_str(base64.b64decode(ensure_bytes(regexp))) try: regexp_name, theregexp = decoded_regexp.split("=", 1) except: regexp_name = None theregexp = decoded_regexp if not matches: self._fire(msg='Secret search analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp)) elif regexp in matches or theregexp in matches_decoded: self._fire(msg='Secret search analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp)) elif regexp_name and regexp_name in matches_decoded: self._fire(msg='Secret search analyzer found regexp match in container: file={} regexp={}'.format(thefile, decoded_regexp))
def evaluate(self, image_obj, context): match_decoded = self.regex_name.value() if match_decoded: match_encoded = ensure_str(base64.b64encode(ensure_bytes(match_decoded))) for thefile, regexps in list(context.data.get("content_regexp", {}).items()): thefile = ensure_str(thefile) if not regexps: continue for regexp in regexps.keys(): found = False decoded_regexp = ensure_str(base64.b64decode(ensure_bytes(regexp))) try: regexp_name, theregexp = decoded_regexp.split("=", 1) except: regexp_name = None theregexp = decoded_regexp if not match_decoded: found = True elif theregexp == match_decoded or regexp == match_encoded: found = True elif regexp_name and regexp_name == match_decoded: found = True if found: self._fire( msg="File content analyzer found regexp match in container: file={} regexp={}".format( thefile, decoded_regexp ) )
def evaluate(self, image_obj, context): match_filter = self.secret_contentregexp.value(default_if_none=[]) name_filter = self.name_regexps.value() name_re = re.compile( name_filter) if self.name_regexps.value() else None match_type = self.match_type.value(default_if_none="found") if match_filter: matches = [base64.b64encode(ensure_bytes(x)) for x in match_filter] matches_decoded = match_filter else: matches = [] matches_decoded = [] onefound = False for thefile, regexps in list( context.data.get("secret_content_regexp", {}).items()): thefile = ensure_str(thefile) if not regexps: continue if regexps and (not name_re or name_re.match(thefile)): for regexp in list(regexps.keys()): found = False decoded_regexp = ensure_str( base64.b64decode(ensure_bytes(regexp))) try: regexp_name, theregexp = decoded_regexp.split("=", 1) except: regexp_name = None theregexp = decoded_regexp if not matches: found = onefound = True elif regexp in matches or theregexp in matches_decoded: found = onefound = True elif regexp_name and regexp_name in matches_decoded: found = onefound = True if found and match_type == "found": self._fire( msg= "Secret content search analyzer found regexp match in container: file={} regexp={}" .format(thefile, decoded_regexp)) if not onefound and match_type == "notfound": f_filter = name_filter if not f_filter: f_filter = "*" m_filter = match_filter if not m_filter: m_filter = "all" self._fire( msg= "Secret content search analyzer did not find regexp match in container: filename_regex={} content_regex_name={}" .format(f_filter, m_filter))
def load_keys(config: dict): """ Based on the passed configuration, load a dict mapping the key name to bytes of the key :param config: dict with specific keys to find keys, paths :return: """ keys = {} if config: if config.get('private_key_path'): priv_keypath = config.get('private_key_path') try: with open(priv_keypath, 'rb') as pem_fp: keys['private'] = pem_fp.read() except IOError as e: raise Exception('Could not load private key file from path: {}. Error: {}'.format(priv_keypath, e)) if config.get('public_key_path'): pub_keypath = config.get('public_key_path') try: with open(pub_keypath, 'rb') as crt_fp: keys['public'] = crt_fp.read() except IOError as e: raise Exception('Could not load public key file from path: {}. Error: {}'.format(pub_keypath, e)) elif config.get('secret'): keys['secret'] = ensure_bytes(config.get('secret')) return keys
def _execute(self): # if image record already exists, exit. with session_scope() as session: if db_catalog_image.get(self.image_digest, self.account, session): logger.info('Image archive restore found existing image records already. Aborting restore.') raise ImageConflict('Conflict: Image already exists in system. No restore possible') dest_obj_mgr = object_store.get_manager() # Load the archive manifest m = self.fileobj.read() if m: tf = tempfile.NamedTemporaryFile(prefix='analysis_archive_{}'.format(self.image_digest), dir=localconfig.get_config()['tmp_dir'], delete=False) try: tf.write(ensure_bytes(m)) tf.close() # Load the archive from the temp file with ImageArchive.for_reading(tf.name) as img_archive: logger.debug('Using manifest: {}'.format(img_archive.manifest)) self.restore_artifacts(img_archive, dest_obj_mgr) self.restore_records(img_archive.manifest) self._reload_policy_engine(img_archive.manifest) finally: os.remove(tf.name) else: raise Exception('No archive manifest found in archive record. Cannot restore')
def get_by_uri(self, uri): try: container, key = self._parse_uri(uri) if container != self.container_name: logger.warn( "Container mismatch between content_uri and configured cotnainer name: {} in db record, but {} in config".format( container, self.container_name ) ) resp = self.client.download( container=container, objects=[key], options={"out_file": "-"} ) for obj in resp: if "contents" in obj and obj["action"] == "download_object": content = b"".join([x for x in obj["contents"]]) ret = utils.ensure_bytes(content) return ret elif obj["action"] == "download_object" and not obj["success"]: raise ObjectKeyNotFoundError( bucket="", key="", userId="", caused_by=None ) raise Exception( "Unexpected operation/action from swift: {}".format(obj["action"]) ) except SwiftError as e: raise ObjectStorageDriverError(cause=e)
def create_archive(bucket, archiveid, bodycontent): httpcode = 500 try: accountName = ApiRequestContextProxy.namespace() archive_sys = archive.get_manager() try: jsonbytes = anchore_utils.ensure_bytes(json.dumps(bodycontent)) my_svc = ApiRequestContextProxy.get_service() if my_svc is not None: resource_url = (my_svc.service_record["base_url"] + "/" + my_svc.service_record["version"] + "/archive/" + bucket + "/" + archiveid) else: resource_url = "N/A" rc = archive_sys.put(accountName, bucket, archiveid, jsonbytes) return_object = resource_url httpcode = 200 except Exception as err: httpcode = 500 raise err except Exception as err: return_object = anchore_engine.common.helpers.make_response_error( err, in_httpcode=httpcode) return return_object, httpcode
def get_by_uri(self, uri): try: container, key = self._parse_uri(uri) if container != self.container_name: logger.warn( 'Container mismatch between content_uri and configured cotnainer name: {} in db record, but {} in config' .format(container, self.container_name)) resp = self.client.download(container=container, objects=[key], options={'out_file': '-'}) for obj in resp: if 'contents' in obj and obj['action'] == 'download_object': content = b''.join([x for x in obj['contents']]) ret = utils.ensure_bytes(content) return (ret) elif obj['action'] == 'download_object' and not obj['success']: raise ObjectKeyNotFoundError(bucket='', key='', userId='', caused_by=None) raise Exception( 'Unexpected operation/action from swift: {}'.format( obj['action'])) except SwiftError as e: raise ObjectStorageDriverError(cause=e)
def create_object(bucket, archiveid, bodycontent): httpcode = 500 try: account_name = ApiRequestContextProxy.namespace() obj_mgr = anchore_engine.subsys.object_store.manager.get_manager() jsonbytes = anchore_utils.ensure_bytes(json.dumps(bodycontent)) rc = obj_mgr.put(account_name, bucket, archiveid, jsonbytes) my_svc = ApiRequestContextProxy.get_service() if my_svc is not None: resource_url = my_svc.service_record[ 'base_url'] + "/" + my_svc.service_record[ 'version'] + "/archive/" + bucket + "/" + archiveid else: resource_url = "N/A" return_object = resource_url httpcode = 200 except Exception as err: return_object = anchore_engine.common.helpers.make_response_error( err, in_httpcode=httpcode) return return_object, httpcode
def _safe_base64_encode(data_provider): try: return utils.ensure_str( base64.encodebytes(utils.ensure_bytes(data_provider()))) except Exception as err: logger.warn("could not base64 encode content - exception: %s", err) return ""
def load_keys(config: dict): """ Based on the passed configuration, load a dict mapping the key name to bytes of the key :param config: dict with specific keys to find keys, paths :return: """ keys = {} if config: if config.get('private_key_path'): priv_keypath = config.get('private_key_path') with open(priv_keypath, 'rb') as pem_fp: keys['private'] = pem_fp.read() # TODO add public x509 cert support to get the key from (DER, PEM formats) if config.get('public_key_path'): pub_keypath = config.get('public_key_path') with open(pub_keypath, 'rb') as crt_fp: keys['public'] = crt_fp.read() elif config.get('secret'): keys['secret'] = ensure_bytes(config.get('secret')) return keys
def load_retrieved_files(self, analysis_report, image_obj): """ Loads the analyzer retrieved files from the image, saves them in the db :param retrieve_files_json: :param image_obj: :return: """ log.info('Loading retrieved files') retrieve_files_json = analysis_report.get('retrieve_files') if not retrieve_files_json: return [] matches = retrieve_files_json.get('file_content.all', {}).get('base', {}) records = [] for filename, match_string in list(matches.items()): match = AnalysisArtifact() match.image_user_id = image_obj.user_id match.image_id = image_obj.id match.analyzer_id = 'retrieve_files' match.analyzer_type = 'base' match.analyzer_artifact = 'file_content.all' match.artifact_key = filename try: match.binary_value = base64.b64decode(ensure_bytes(match_string)) except: log.exception('Could not b64 decode the file content for {}'.format(filename)) raise records.append(match) return records
def _fetch_group_data( self, group: GroupDownloadOperationConfiguration) -> typing.Iterable: """ Execute the download and write the data into the local repo location for the group :param group: :return: generator for the record count of each chunk as it is downloaded """ get_next = True next_token = None chunk_number = 0 count = 0 since = group.parameters.since if not self.fetch_all else None while get_next: logger.info( 'Downloading page {} of feed data for feed group: {}/{}'. format(chunk_number, group.feed, group.group)) group_data = self.service_client.get_feed_group_data( group.feed, group.group, since=since, next_token=next_token) get_next = bool(group_data.next_token) next_token = group_data.next_token count += group_data.record_count if group_data.data is not None: self.local_repo.write_data(group.feed, group.group, chunk_number, ensure_bytes(group_data.data)) chunk_number += 1 yield group_data.record_count logger.info( 'Completed data download of for feed group: {}/{}. Total pages: {}' .format(group.feed, group.group, chunk_number))
def get_paged_feed_group_data(self, feed, group, since=None, next_token=None): if type(since) == datetime.datetime: since = since.isoformat() files = [] group_path = os.path.join(self.src_path, feed, group) if next_token: next_token = ensure_str( base64.decodebytes(ensure_bytes(next_token))) data = [] size = 0 token = None back_boundary = since forward_boundary = self.newest_allowed.isoformat( ) if self.newest_allowed else None logger.debug( 'Getting data for {}/{} with back boundary {} and forward boundary {}' .format(feed, group, back_boundary, forward_boundary)) for datafile_name in sorted(os.listdir(group_path)): if (not back_boundary or (datafile_name >= back_boundary)) and ( not forward_boundary or (forward_boundary and datafile_name <= forward_boundary)) and ( not next_token or datafile_name >= next_token): logger.debug('Using data file {}'.format(datafile_name)) fpath = os.path.join(group_path, datafile_name) s = os.stat(fpath) if size + s.st_size > self.max_content_size: token = datafile_name break else: size += s.st_size with open(fpath) as f: content = json.load(f) data += content else: logger.debug('Data file {} outside of bounds, skipping'.format( datafile_name)) continue return data, ensure_str(base64.encodebytes( ensure_bytes(token))) if token else None
def _do_compress(self, data): """ Handle data compression based on global config. Returns the data to use as payload, compressed as necessary based on config. :param data: :return: """ if self.config[COMPRESSION_SECTION_KEY][COMPRESSION_ENABLED_KEY] is True and self.primary_client.__supports_compressed_data__ and len(data) > \ self.config[COMPRESSION_SECTION_KEY][COMPRESSION_MIN_SIZE_KEY] * 1024: is_compressed = True final_payload = utils.ensure_bytes(zlib.compress(utils.ensure_bytes(data), COMPRESSION_LEVEL)) else: is_compressed = False final_payload = utils.ensure_bytes(data) return final_payload, is_compressed
def _decode(self, raw_result): is_b64 = bool(raw_result.get('b64_encoded', False)) data = raw_result.get('jsondata') if data and is_b64: return base64.decodebytes(utils.ensure_bytes(data)) elif data is None: return b'' else: return data
def load_javas(self, analysis_json, containing_image): pkgs_json = analysis_json.get('package_list', {}).get('pkgs.java', {}).get('base') if not pkgs_json: return [] pkgs = [] for path, pkg_str in list(pkgs_json.items()): pkg_json = json.loads(pkg_str) n = ImagePackage() # primary keys # TODO - some java names have a version in it, need to clean that up n.name = pkg_json.get('name') n.pkg_type = 'java' n.arch = 'N/A' n.pkg_path = path version = None versions_json = {} for k in [ 'maven-version', 'implementation-version', 'specification-version' ]: if not version and pkg_json.get(k, 'N/A') != 'N/A': version = pkg_json.get(k) versions_json[k] = pkg_json.get(k, 'N/A') if version: n.version = version else: n.version = 'N/A' n.image_user_id = containing_image.user_id n.image_id = containing_image.id # other non-PK values n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.distro_name = 'java' n.distro_version = 'N/A' n.like_distro = 'java' n.fullversion = n.version m = pkg_json.get('metadata') m['java_versions'] = versions_json n.metadata_json = m fullname = n.name pomprops = n.get_pom_properties() if pomprops: fullname = "{}:{}".format(pomprops.get('groupId'), pomprops.get('artifactId')) n.normalized_src_pkg = fullname n.src_pkg = fullname pkgs.append(n) return pkgs
def refresh_ecr_credentials(registry, access_key_id, secret_access_key): localconfig = anchore_engine.configuration.localconfig.get_config() try: account_id, region = parse_registry_url(registry) # aws: assume role on the ec2 instance if access_key_id == 'awsauto' or secret_access_key == 'awsauto': if 'allow_awsecr_iam_auto' in localconfig and localconfig[ 'allow_awsecr_iam_auto']: access_key_id = secret_access_key = None client = boto3.client('ecr', aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key, region_name=region) else: raise Exception( "registry is set to 'awsauto', but system is not configured to allow (allow_awsecr_iam_auto: False)" ) # aws: assume cross account roles elif access_key_id == '_iam_role': try: sts = boto3.client('sts') session = sts.assume_role(RoleArn=secret_access_key, RoleSessionName=str(int( time.time()))) access_key_id = session['Credentials']['AccessKeyId'] secret_access_key = session['Credentials']['SecretAccessKey'] session_token = session['Credentials']['SessionToken'] client = boto3.client('ecr', aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key, aws_session_token=session_token, region_name=region) except Exception as err: raise err # aws: provide key & secret else: client = boto3.client('ecr', aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key, region_name=region) r = client.get_authorization_token(registryIds=[account_id]) ecr_data = r['authorizationData'][0] except Exception as err: logger.warn("failure to get/refresh ECR credential - exception: " + str(err)) raise err ret = {} ret['authorizationToken'] = utils.ensure_str( base64.decodebytes(utils.ensure_bytes(ecr_data['authorizationToken']))) ret['expiresAt'] = int(ecr_data['expiresAt'].strftime('%s')) return (ret)
def get_by_uri(self, uri): bucket, key = self._parse_uri(uri) try: resp = self.s3_client.get_object(Bucket=bucket, Key=key) content = resp['Body'].read() ret = utils.ensure_bytes(content) return(ret) except Exception as e: raise e
def feed_json_file_array(): files = [] for i in range(3): f = tempfile.NamedTemporaryFile("w+b", prefix="file-{}-".format(i)) b = ensure_bytes(json.dumps(sequence_data[i])) f.write(b) f.seek(0) files.append(f) return files
def _trigger_id(scanner, file, signature): """ Trigger id is a string, but encoded as scanner name, signature, and m5hash of the file path (to keep size within reasonable bounds) :param scanner: :param file: :param signature: :return: """ return '{}+{}+{}'.format( scanner, signature, ensure_str(hashlib.md5(ensure_bytes(file)).hexdigest()))
def test_archive_basic(): path = "testarc.tar.gz" with archiver.ImageArchive(path, mode="w") as arc: arc.image_digest = "sha256:1234567890abcdef" arc.account = "testaccount" arc.add_artifact( "analysis", archiver.ObjectStoreLocation(bucket="somebucket", key="somekey"), data=ensure_bytes(json.dumps({"somedata": "somekey"})), metadata={"size": 0}, ) with archiver.ImageArchive(path, mode="r") as arc: print(arc.manifest.artifacts) assert len(arc.manifest.artifacts) == 1 s = arc.extract_artifact("analysis") print(s) assert s == ensure_bytes(json.dumps({"somedata": "somekey"})) os.remove(path)
def load_npms(self, analysis_json, containing_image): handled_pkgtypes = ['pkgs.npms'] npms_json = analysis_json.get('package_list', {}).get('pkgs.npms',{}).get('base') if not npms_json: return [], handled_pkgtypes npms = [] image_packages = [] for path, npm_str in list(npms_json.items()): npm_json = json.loads(npm_str) # TODO: remove this usage of ImageNPM, that is deprecated n = ImageNpm() n.path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.path = path n.name = npm_json.get('name') n.src_pkg = npm_json.get('src_pkg') n.origins_json = npm_json.get('origins') n.licenses_json = npm_json.get('lics') n.latest = npm_json.get('latest') n.versions_json = npm_json.get('versions') n.image_user_id = containing_image.user_id n.image_id = containing_image.id #npms.append(n) np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'npm' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'npm' np.distro_version = 'N/A' np.like_distro = 'npm' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) #np.metadata_json = pkg_json.get('metadata') fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname image_packages.append(np) return image_packages, handled_pkgtypes
def test_archive_basic(): path = 'testarc.tar.gz' with archiver.ImageArchive(path, mode='w') as arc: arc.image_digest = 'sha256:1234567890abcdef' arc.account = 'testaccount' arc.add_artifact('analysis', archiver.ObjectStoreLocation(bucket='somebucket', key='somekey'), data=ensure_bytes(json.dumps({'somedata': 'somekey'})), metadata={'size': 0}) with archiver.ImageArchive(path, mode='r') as arc: print(arc.manifest.artifacts) assert len(arc.manifest.artifacts) == 1 s = arc.extract_artifact('analysis') print(s) assert s == ensure_bytes(json.dumps({'somedata': 'somekey'})) os.remove(path)
def get_by_uri(self, uri): if not self.initialized: raise Exception("archive not initialized") try: path = self._parse_uri(uri) content = self._load_content(path) ret = utils.ensure_bytes(content) return (ret) except Exception as e: raise ObjectKeyNotFoundError(userId='', bucket='', key='', caused_by=e)
def get_feed_group_data(self, feed, group, since=None, next_token=None): """ Extended implementation of parent type function that includes a limit to how fresh of data is allowed. Will return all records between 'since' date and 'newest_allowed' date unless newest_allowed is None in which case there is no forward limit. :param feed: :param group: :param since: :param next_token: :return: """ if type(since) == datetime.datetime: since = since.isoformat() group_path = os.path.join(self.src_path, feed, group) data = [] back_boundary = since forward_boundary = ( self.newest_allowed.isoformat() if self.newest_allowed else None ) logger.debug( "Getting data for {}/{} with back boundary {} and forward boundary {}".format( feed, group, back_boundary, forward_boundary ) ) for datafile_name in sorted(os.listdir(group_path)): if (not back_boundary or (datafile_name >= back_boundary)) and ( not forward_boundary or (forward_boundary and datafile_name <= forward_boundary) ): logger.debug("Using data file {}".format(datafile_name)) fpath = os.path.join(group_path, datafile_name) with open(fpath) as f: content = json.load(f) if isinstance(content, dict): data.extend(content["data"]) else: data.extend(content) else: logger.debug( "Data file {} outside of bounds, skipping".format(datafile_name) ) continue # Make it look like a single chunk of data from the API (e.g. next_token and data keys data = {"data": data, "next_token": ""} outdata = ensure_bytes(json.dumps(data)) return GroupData( data=outdata, next_token=None, since=since, record_count=len(data["data"]) )
def get_by_uri(self, uri: str) -> bytes: userId, bucket, key = self._parse_uri(uri) try: with db.session_scope() as dbsession: result = db_archivedocument.get(userId, bucket, key, session=dbsession) if result: return utils.ensure_bytes(self._decode(result)) else: raise ObjectKeyNotFoundError(userId, bucket, key, caused_by=None) except Exception as err: logger.debug("cannot get data: exception - " + str(err)) raise err
def put_document(self, userId, bucket, archiveId, data): """ Write a json document to the object store :param userId: :param bucket: :param archiveId: :param data: a json serializable object (string, dict, list, etc) :return: str url for retrieval """ payload = json.dumps({"document": data}) return self.put(userId, bucket, archiveId, utils.ensure_bytes(payload))