def validate_pullstring_is_digest(pullstring: str) -> bool: try: parsed = parse_dockerimage_string(pullstring) return parsed.get('digest') is not None except Exception as e: logger.debug_exception('Error parsing pullstring {}. Err = {}'.format(pullstring, e)) raise ValueError('Error parsing pullstring {}'.format(pullstring))
def exec(docker_archive, anchore_archive, digest, parent_digest, image_id, tag, account_id, manifest, dockerfile, created_at, annotation): """ Analyze a local image stored as a docker archive (output result of 'docker save'), and generate an anchore image archive tarball ready for import into an anchore engine. DOCKER_ARCHIVE : Location of input docker archive tarfile to analyze ANCHORE_ARCHIVE : Location of output anchore image archive to write """ global config # this could be improved to allow use to input timestamps (created_at, analyzed_at, etc) now = int(time.time()) try: try: imageDigest = None input_manifest_data = None rawmanifest = None if (not manifest and not digest) or (manifest and digest): raise Exception( "must supply either an image digest or a valid manifest, but not both" ) if os.path.exists(anchore_archive): raise Exception( "the supplied anchore archive file ({}) already exists, please remove and try again" .format(anchore_archive)) if manifest: try: with open(manifest, 'r') as FH: # TODO implement manifest validator for anchore requirements, specifically rawmanifest = FH.read() input_manifest_data = json.loads(rawmanifest) imageDigest = manifest_to_digest(rawmanifest) except Exception as err: raise ValueError( "cannot calculate digest from supplied manifest - exception: {}" .format(err)) if digest: if re.match("^sha256:[\d|a-f]{64}$", digest): imageDigest = digest else: raise ValueError( "input digest does not validate - must be sha256:<64 hex characters>" ) if parent_digest: if re.match("^sha256:[\d|a-f]{64}$", parent_digest): parentDigest = parent_digest else: raise ValueError( "input parent_digest does not validate - must be sha256:<64 hex characters>" ) else: parentDigest = imageDigest if image_id: if re.match("^[\d|a-f]{64}$", image_id): imageId = image_id else: raise ValueError("input image_id does not validate") else: # TODO this could be improved to generate imageId from configuration hash imageId = "{}".format(''.join( [random.choice('0123456789abcdef') for x in range(0, 64)])) if account_id: userId = account_id else: userId = 'admin' if created_at: try: if int(created_at) < 0 or int(created_at) > now + 1: raise Exception() except Exception as err: raise ValueError( "created_at must by a unix timestamp between 0 and now ({})" .format(now)) else: created_at = now try: inputTag = tag image_info = parse_dockerimage_string(inputTag) fulltag = "{}/{}:{}".format(image_info['registry'], image_info['repo'], image_info['tag']) fulldigest = "{}/{}@{}".format(image_info['registry'], image_info['repo'], imageDigest) except Exception as err: raise ValueError( "input tag does not validate - exception: {}".format(err)) dockerfile_mode = "Guessed" dockerfile_contents = None if dockerfile: with open(dockerfile, 'r') as FH: dockerfile_contents = ensure_str( base64.b64encode(ensure_bytes(FH.read()))) dockerfile_mode = "Actual" annotations = {} if annotation: for a in annotation: try: (k, v) = a.split('=', 1) if k and v: annotations[k] = v else: raise Exception("found null in key or value") except Exception as err: raise ValueError( "annotation format error - annotations must be of the form (--annotation key=value), found: {}" .format(a)) workspace_root = config['tmp_dir'] except Exception as err: # input setup/validation failure raise err logger.debug( "input has been prepared: imageDigest={} parentDigest={} imageId={} inputTag={} fulltag={} fulldigest={} userId={} annotations={} created_at={}" .format(imageDigest, parentDigest, imageId, inputTag, fulltag, fulldigest, userId, annotations, created_at)) # create an image record try: image_record = make_image_record(userId, 'docker', None, image_metadata={ 'tag': fulltag, 'digest': fulldigest, 'imageId': imageId, 'parentdigest': parentDigest, 'created_at': created_at, 'dockerfile': dockerfile_contents, 'dockerfile_mode': dockerfile_mode, 'annotations': annotations }, registry_lookup=False, registry_creds=(None, None)) image_record['created_at'] = created_at image_record['last_updated'] = created_at image_record['analyzed_at'] = now image_record['analysis_status'] = 'analyzed' image_record['image_status'] = 'active' image_record['record_state_key'] = 'active' for image_detail in image_record['image_detail']: image_detail['created_at'] = created_at image_detail['last_updated'] = created_at image_detail['tag_detected_at'] = created_at image_detail['record_state_key'] = 'active' except Exception as err: # image record setup fail raise err # perform analysis try: image_data, analyzed_manifest_data = analyze_image( userId, rawmanifest, image_record, workspace_root, config, registry_creds=[], use_cache_dir=None, image_source='docker-archive', image_source_meta=docker_archive) image_content_data = {} for content_type in anchore_engine.common.image_content_types + anchore_engine.common.image_metadata_types: try: image_content_data[ content_type] = anchore_engine.common.helpers.extract_analyzer_content( image_data, content_type, manifest=input_manifest_data) except: image_content_data[content_type] = {} anchore_engine.common.helpers.update_image_record_with_analysis_data( image_record, image_data) image_record['image_size'] = int(image_record['image_size']) except Exception as err: # image analysis fail raise err # generate an output image archive tarball archive_file = anchore_archive try: with ImageArchive.for_writing(archive_file) as img_archive: img_archive.account = userId img_archive.image_digest = imageDigest img_archive.manifest.metadata = { 'versions': localconfig.get_versions(), 'image_id': imageId, 'image_record': json.dumps(image_record, sort_keys=True) } pack_data = {'document': image_data} data = ensure_bytes(json.dumps(pack_data, sort_keys=True)) img_archive.add_artifact('analysis', source=ObjectStoreLocation( bucket='analysis_data', key=imageDigest), data=data, metadata=None) pack_data = {'document': image_content_data} data = ensure_bytes(json.dumps(pack_data, sort_keys=True)) img_archive.add_artifact('image_content', source=ObjectStoreLocation( bucket='image_content_data', key=imageDigest), data=data, metadata=None) pack_data = {'document': input_manifest_data} data = ensure_bytes(json.dumps(pack_data, sort_keys=True)) img_archive.add_artifact('image_manifest', source=ObjectStoreLocation( bucket='manifest_data', key=imageDigest), data=data, metadata=None) except Exception as err: # archive tarball generate fail raise err except Exception as err: logger.error( anchore_manager.cli.utils.format_error_output( click_config, 'db', {}, err)) sys.exit(2) click.echo( "Analysis complete for image {} - archive file is located at {}". format(imageDigest, archive_file))
def test_parse_dockerimage_string(image, expected): result = parse_dockerimage_string(image) assert result == expected
def normalize_image_add_source(analysis_request_dict): """ Normalizes the ImageAnalysisRequest-schema input request (validated already at API marshalling) into using the 'source' property instead of the deprecated 'tag', 'digest', and 'dockerfile' properties. Returns a new dict with the normalized request :param analysis_request_dict: :return: normalized request dict """ if not analysis_request_dict: raise ValueError("Invalid request object, must be a valid json object") normalized = copy.deepcopy(analysis_request_dict) if normalized.get("source"): # Already has a source, that should be validated return normalized source = {} digest = tag = dockerfile = created_at = None if "digest" in normalized: digest = normalized.pop("digest") if "tag" in normalized: tag = normalized.pop("tag") if "dockerfile" in normalized: dockerfile = normalized.pop("dockerfile") if "created_at" in normalized: created_at = normalized.pop("created_at") # use legacy fields and normalize to a source if digest: if DIGEST_REGEX.match(digest) is not None: # It's only a digest (e.g. sha256:abc), construct a pullstring if tag: parsed = parse_dockerimage_string(tag) digest_pullstring = (parsed["registry"] + "/" + parsed["repo"] + "@" + digest) else: raise ValueError( "For a digest-based analysis, the tag property must also be populated" ) else: # assume pull string, so no-op digest_pullstring = digest source["digest"] = { "pullstring": digest_pullstring, "tag": tag, "creation_timestamp_override": created_at, } if dockerfile: source["digest"]["dockerfile"] = dockerfile normalized["source"] = source elif tag: source["tag"] = {"pullstring": tag} if dockerfile: source["tag"]["dockerfile"] = dockerfile normalized["source"] = source else: raise BadRequest( 'Must include either "tag", "tag" and "digest", or "source" property in body', detail={}, ) return normalized
def test_parse_dockerimage_string(): tests = [ ('docker.io/library/nginx', { 'digest': None, 'fulldigest': None, 'fulltag': 'docker.io/library/nginx:latest', 'host': 'docker.io', 'imageId': None, 'port': None, 'pullstring': 'docker.io/library/nginx:latest', 'registry': 'docker.io', 'repo': 'library/nginx', 'repotag': 'library/nginx:latest', 'tag': 'latest'}), ('docker.io/nginx', { 'digest': None, 'fulldigest': None, 'fulltag': 'docker.io/nginx:latest', 'host': 'docker.io', 'imageId': None, 'port': None, 'pullstring': 'docker.io/nginx:latest', 'registry': 'docker.io', 'repo': 'nginx', 'repotag': 'nginx:latest', 'tag': 'latest'}), ('nginx', { 'digest': None, 'fulldigest': None, 'fulltag': 'docker.io/nginx:latest', 'host': 'docker.io', 'imageId': None, 'port': None, 'pullstring': 'docker.io/nginx:latest', 'registry': 'docker.io', 'repo': 'nginx', 'repotag': 'nginx:latest', 'tag': 'latest'}), ('docker.io/library/nginx@sha256:abcdef123', { 'digest': 'sha256:abcdef123', 'fulldigest': 'docker.io/library/nginx@sha256:abcdef123', 'fulltag': None, 'host': 'docker.io', 'imageId': None, 'port': None, 'pullstring': 'docker.io/library/nginx@sha256:abcdef123', 'registry': 'docker.io', 'repo': 'library/nginx', 'repotag': None, 'tag': None}), ('docker.io/nginx@sha256:abcdef123', { 'digest': 'sha256:abcdef123', 'fulldigest': 'docker.io/nginx@sha256:abcdef123', 'fulltag': None, 'host': 'docker.io', 'imageId': None, 'port': None, 'pullstring': 'docker.io/nginx@sha256:abcdef123', 'registry': 'docker.io', 'repo': 'nginx', 'repotag': None, 'tag': None}) ] for input, result in tests: logger.info('Testing parsing {}'.format(input)) output = parse_dockerimage_string(input) try: assert output == result except: logger.error('Failed parsing {} to expected: {}, Got: {}'.format(input, result, output)) raise