def load_javas(self, analysis_json, containing_image): pkgs_json = analysis_json.get('package_list', {}).get('pkgs.java', {}).get('base') if not pkgs_json: return [] pkgs = [] for path, pkg_str in list(pkgs_json.items()): pkg_json = json.loads(pkg_str) n = ImagePackage() # primary keys # TODO - some java names have a version in it, need to clean that up n.name = pkg_json.get('name') n.pkg_type = 'java' n.arch = 'N/A' n.pkg_path = path version = None versions_json = {} for k in [ 'maven-version', 'implementation-version', 'specification-version' ]: if not version and pkg_json.get(k, 'N/A') != 'N/A': version = pkg_json.get(k) versions_json[k] = pkg_json.get(k, 'N/A') if version: n.version = version else: n.version = 'N/A' n.image_user_id = containing_image.user_id n.image_id = containing_image.id # other non-PK values n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.distro_name = 'java' n.distro_version = 'N/A' n.like_distro = 'java' n.fullversion = n.version m = pkg_json.get('metadata') m['java_versions'] = versions_json n.metadata_json = m fullname = n.name pomprops = n.get_pom_properties() if pomprops: fullname = "{}:{}".format(pomprops.get('groupId'), pomprops.get('artifactId')) n.normalized_src_pkg = fullname n.src_pkg = fullname pkgs.append(n) return pkgs
def load_npms(self, analysis_json, containing_image): handled_pkgtypes = ['pkgs.npms'] npms_json = analysis_json.get('package_list', {}).get('pkgs.npms',{}).get('base') if not npms_json: return [], handled_pkgtypes npms = [] image_packages = [] for path, npm_str in list(npms_json.items()): npm_json = json.loads(npm_str) # TODO: remove this usage of ImageNPM, that is deprecated n = ImageNpm() n.path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.path = path n.name = npm_json.get('name') n.src_pkg = npm_json.get('src_pkg') n.origins_json = npm_json.get('origins') n.licenses_json = npm_json.get('lics') n.latest = npm_json.get('latest') n.versions_json = npm_json.get('versions') n.image_user_id = containing_image.user_id n.image_id = containing_image.id #npms.append(n) np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'npm' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'npm' np.distro_version = 'N/A' np.like_distro = 'npm' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) #np.metadata_json = pkg_json.get('metadata') fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname image_packages.append(np) return image_packages, handled_pkgtypes
def load_gems(self, analysis_json, containing_image): gems_json = analysis_json.get('package_list', {}).get('pkgs.gems', {}).get('base') if not gems_json: return [] gems = [] image_packages = [] for path, gem_str in list(gems_json.items()): gem_json = json.loads(gem_str) n = ImageGem() n.path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.path = path n.name = gem_json.get('name') n.src_pkg = gem_json.get('src_pkg') n.origins_json = gem_json.get('origins') n.licenses_json = gem_json.get('lics') n.versions_json = gem_json.get('versions') n.latest = gem_json.get('latest') n.image_user_id = containing_image.user_id n.image_id = containing_image.id #gems.append(n) np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'gem' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'gem' np.distro_version = 'N/A' np.like_distro = 'gem' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) #np.metadata_json = pkg_json.get('metadata') fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname image_packages.append(np) return image_packages
def load_generic_packages(self, analysis_json, containing_image, excludes=[]): pkgs = [] handled_pkgtypes = [] package_types = analysis_json.get('package_list', {}) for package_type in package_types: if package_type not in excludes: patt = re.match(r"pkgs\.(.*)", package_type) if patt: ptype = patt.group(1) handled_pkgtypes.append(ptype) pkgs_json = analysis_json.get('package_list', {}).get(package_type, {}).get('base', {}) if not pkgs_json: return [], handled_pkgtypes for path, pkg_str in list(pkgs_json.items()): pkg_json = json.loads(pkg_str) n = ImagePackage() # primary keys n.name = pkg_json.get('name') n.pkg_path = path n.version = pkg_json.get('version') n.pkg_type = pkg_json.get('type', 'N/A') n.arch = 'N/A' n.image_user_id = n.image_user_id n.image_id = n.image_id # other n.pkg_path_hash = hashlib.sha256( ensure_bytes(path)).hexdigest() n.distro_name = n.pkg_type n.distro_version = 'N/A' n.like_distro = n.pkg_type n.fullversion = n.version n.license = pkg_json.get('license', 'N/A') n.origin = pkg_json.get('origin', 'N/A') fullname = n.name n.normalized_src_pkg = fullname n.src_pkg = fullname pkgs.append(n) return pkgs, handled_pkgtypes
def load_pythons(self, analysis_json, containing_image): pkgs_json = analysis_json.get('package_list', {}).get('pkgs.python', {}).get('base') if not pkgs_json: return [] pkgs = [] for path, pkg_str in list(pkgs_json.items()): pkg_json = json.loads(pkg_str) n = ImagePackage() # primary keys n.name = pkg_json.get('name') n.pkg_path = path n.version = pkg_json.get('version') n.pkg_type = 'python' n.arch = 'N/A' n.image_user_id = n.image_user_id n.image_id = n.image_id # other n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.distro_name = 'python' n.distro_version = 'N/A' n.like_distro = 'python' n.fullversion = n.version n.license = pkg_json.get('license') n.origin = pkg_json.get('origin') m = { 'python_distribution_metadata': pkg_json.get('metadata'), 'files': pkg_json.get('files') } n.metadata_json = m fullname = n.name n.normalized_src_pkg = fullname n.src_pkg = fullname pkgs.append(n) return pkgs
def load_and_normalize_packages(self, package_analysis_json, image_obj): """ Loads and normalizes package data from all distros :param image_obj: :param package_analysis_json: :return: list of Package objects that can be added to an image """ pkgs = [] img_distro = DistroNamespace.for_obj(image_obj) # pkgs.allinfo handling pkgs_all = package_analysis_json.get('pkgs.allinfo', {}).values() if not pkgs_all: return [] else: pkgs_all = pkgs_all[0] for pkg_name, metadata_str in pkgs_all.items(): metadata = json.loads(metadata_str) p = ImagePackage() p.distro_name = image_obj.distro_name p.distro_version = image_obj.distro_version p.like_distro = image_obj.like_distro p.name = pkg_name p.version = metadata.get('version') p.origin = metadata.get('origin') p.size = metadata.get('size') p.arch = metadata.get('arch') p.license = metadata.get('license') if metadata.get( 'license') else metadata.get('lics') p.release = metadata.get('release', 'N/A') p.pkg_type = metadata.get('type') p.src_pkg = metadata.get('sourcepkg') p.image_user_id = image_obj.user_id p.image_id = image_obj.id if 'files' in metadata: # Handle file data p.files = metadata.get('files') if p.release != 'N/A': p.fullversion = p.version + '-' + p.release else: p.fullversion = p.version if img_distro.flavor == 'DEB': cleanvers = re.sub(re.escape("+b") + "\d+.*", "", p.version) spkg = re.sub(re.escape("-" + cleanvers), "", p.src_pkg) else: spkg = re.sub(re.escape("-" + p.version) + ".*", "", p.src_pkg) p.normalized_src_pkg = spkg pkgs.append(p) if pkgs: return pkgs else: log.warn('Pkg Allinfo not found, reverting to using pkgs.all') all_pkgs = package_analysis_json['pkgs.all']['base'] all_pkgs_src = package_analysis_json['pkgs_plus_source.all']['base'] for pkg_name, version in all_pkgs.items(): p = ImagePackage() p.image_user_id = image_obj.user_id p.image_id = image_obj.id p.name = pkg_name p.version = version p.fullversion = all_pkgs_src[pkg_name] if img_distro.flavor == 'RHEL': name, parsed_version, release, epoch, arch = split_rpm_filename( pkg_name + '-' + version + '.tmparch.rpm') p.version = parsed_version p.release = release p.pkg_type = 'RPM' p.origin = 'N/A' p.src_pkg = 'N/A' p.license = 'N/A' p.arch = 'N/A' elif img_distro.flavor == 'DEB': try: p.version, p.release = version.split('-') except: p.version = version p.release = None return pkgs
test_image.like_distro = 'centos' test_image.state = 'analyzed' test_image.digest = 'digest1' test_image.anchore_type = 'undefined' test_image.dockerfile_mode = 'Guessed' test_image.docker_history_json = ['line1', 'line2'] test_image.docker_data_json = {'Config': {}, 'ContainerConfig': {}} test_image.dockerfile_contents = 'FROM BLAH' test_package = ImagePackage(image_user_id=test_user_id, image_id=test_img_id, name='testpackage', version='1.0', pkg_type='RPM') test_package.src_pkg = 'testpackage' test_package.distro_name = 'centos' test_package.distro_version = '7' test_package.like_distro = 'centos' test_package.license = 'apache2' test_package.fullversion = '1.0' test_package.normalized_src_pkg = '1.0' test_package.release = '' test_package.size = 1000 test_package.origin = 'upstream' test_package.arch = 'x86_64' test_package.image = test_image test_cve = Vulnerability(id='CVE123', namespace_name='centos:7') test_cve.severity = 'High' test_cve.description = 'some test cve' test_cve.cvss2_score = '1.0'
def policy_engine_packages_upgrade_007_008(): from anchore_engine.db import session_scope, ImagePackage, ImageNpm, ImageGem, Image if True: engine = anchore_engine.db.entities.common.get_engine() file_path_length = 512 hash_length = 80 new_columns = [{ 'table_name': 'image_packages', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), Column('pkg_path_hash', String(hash_length)), Column('metadata_json', StringJSON), ] }, { 'table_name': 'image_package_vulnerabilities', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] }, { 'table_name': 'image_package_db_entries', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] }] log.err("creating new table columns") for table in new_columns: for column in table['columns']: log.err("creating new column ({}) in table ({})".format( column.name, table.get('table_name', ""))) try: cn = column.compile(dialect=engine.dialect) ct = column.type.compile(engine.dialect) engine.execute( 'ALTER TABLE %s ADD COLUMN IF NOT EXISTS %s %s' % (table['table_name'], cn, ct)) except Exception as e: log.err( 'failed to perform DB upgrade on {} adding column - exception: {}' .format(table, str(e))) raise Exception( 'failed to perform DB upgrade on {} adding column - exception: {}' .format(table, str(e))) # populate the new columns log.err("updating new column (pkg_path) - this may take a while") for table in ['image_packages', 'image_package_vulnerabilities']: log.err("updating table ({}) column (pkg_path)".format(table)) done = False while not done: startts = time.time() rc = engine.execute( "UPDATE {} set pkg_path='pkgdb' where pkg_path is null". format(table)) log.err( "updated {} records in {} (time={}), performing next range" .format(rc.rowcount, table, time.time() - startts)) done = True with session_scope() as dbsession: db_image_ids = dbsession.query(Image.id).distinct().all() total_records = len(db_image_ids) record_count = 0 for record in db_image_ids: db_image_id = record[0] startts = time.time() rc = engine.execute( "UPDATE image_package_db_entries set pkg_path='pkgdb' where image_id='{}' and pkg_path is null" .format(db_image_id)) record_count = record_count + 1 log.err( "updated {} image ({} / {}) in {} (time={}), performing next image update" .format(db_image_id, record_count, total_records, 'image_package_db_entries', time.time() - startts)) exec_commands = [ 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkg_image_id_fkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_image_id_fkey', 'ALTER TABLE image_packages DROP CONSTRAINT IF EXISTS image_packages_pkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_pkey', 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkey', ] log.err( "dropping primary key / foreign key relationships for new column") cmdcount = 1 for command in exec_commands: log.err("running update operation {} of {}: {}".format( cmdcount, len(exec_commands), command)) engine.execute(command) cmdcount = cmdcount + 1 exec_commands = [ 'ALTER TABLE image_packages ADD PRIMARY KEY (image_id,image_user_id,name,version,pkg_type,arch,pkg_path)', 'ALTER TABLE image_package_vulnerabilities ADD PRIMARY KEY (pkg_user_id,pkg_image_id,pkg_name,pkg_version,pkg_type,pkg_arch,vulnerability_id,pkg_path)', 'ALTER TABLE image_package_db_entries ADD PRIMARY KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path,file_path)', 'ALTER TABLE image_package_vulnerabilities ADD CONSTRAINT image_package_vulnerabilities_pkg_image_id_fkey FOREIGN KEY (pkg_image_id, pkg_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', 'ALTER TABLE image_package_db_entries ADD CONSTRAINT image_package_db_entries_image_id_fkey FOREIGN KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', # These are helpers for the upgrade itself, not needed by the functioning system. Needed for large npm/gem tables and pagination support "CREATE SEQUENCE IF NOT EXISTS image_npms_seq_id_seq", "ALTER TABLE image_npms add column IF NOT EXISTS seq_id int DEFAULT nextval('image_npms_seq_id_seq')", "CREATE INDEX IF NOT EXISTS idx_npm_seq ON image_npms using btree (seq_id)", "CREATE SEQUENCE IF NOT EXISTS image_gems_seq_id_seq", "ALTER TABLE image_gems add column IF NOT EXISTS seq_id int DEFAULT nextval('image_gems_seq_id_seq')", "CREATE INDEX IF NOT EXISTS idx_gem_seq ON image_gems using btree (seq_id)", "ALTER TABLE image_packages ALTER COLUMN origin TYPE varchar" ] log.err( "updating primary key / foreign key relationships for new column - this may take a while" ) cmdcount = 1 for command in exec_commands: log.err("running update operation {} of {}: {}".format( cmdcount, len(exec_commands), command)) engine.execute(command) cmdcount = cmdcount + 1 log.err( "converting ImageNpm and ImageGem records into ImagePackage records - this may take a while" ) # migrate ImageNpm and ImageGem records into ImagePackage records with session_scope() as dbsession: total_npms = dbsession.query(ImageNpm).count() total_gems = dbsession.query(ImageGem).count() log.err("will migrate {} image npm records".format(total_npms)) npms = [] chunk_size = 8192 record_count = 0 skipped_count = 0 with session_scope() as dbsession: try: last_seq = -1 while record_count < total_npms: chunk_time = time.time() log.err('Processing next chunk of records') for n in dbsession.query(ImageNpm).filter( ImageNpm.seq_id > last_seq).limit(chunk_size): np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'npm' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'npm' np.distro_version = 'N/A' np.like_distro = 'npm' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname npms.append(np) last_seq = n.seq_id if len(npms): log.err('Inserting {} new records'.format(len(npms))) startts = time.time() try: with session_scope() as dbsession2: dbsession2.bulk_save_objects(npms) except Exception as err: log.err("skipping duplicates: {}".format(err)) skipped_count += 1 record_count = record_count + len(npms) log.err("merged {} / {} npm records (time={})".format( record_count, total_npms, time.time() - startts)) log.err( 'Chunk took: {} seconds to process {} records'.format( time.time() - chunk_time, len(npms))) npms = [] except Exception as err: log.err('Error during npm migration: {}'.format(err)) raise err log.err("will migrate {} image gem records".format(total_gems)) gems = [] record_count = 0 skipped_count = 0 with session_scope() as dbsession: try: last_seq = -1 while record_count < total_gems: chunk_time = time.time() log.err('Processing next chunk of records') for n in dbsession.query(ImageGem).filter( ImageGem.seq_id > last_seq).limit(chunk_size): np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'gem' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'gem' np.distro_version = 'N/A' np.like_distro = 'gem' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname gems.append(np) last_seq = n.seq_id if len(gems): log.err('Inserting {} new records'.format(len(gems))) startts = time.time() try: with session_scope() as dbsession2: dbsession2.bulk_save_objects(gems) except Exception as err: log.err("skipping duplicates: {}".format(err)) skipped_count += 1 record_count = record_count + len(gems) log.err("merged {} / {} gem records (time={})".format( record_count, total_gems, time.time() - startts)) log.err( 'Chunk took: {} seconds to process {} records'.format( time.time() - chunk_time, len(npms))) gems = [] except Exception as err: log.err('Error during gem migration: {}'.format(err)) raise err
def load_javas(self, analysis_json, containing_image): handled_pkgtypes = ['pkgs.java'] pkgs_json = analysis_json.get('package_list', {}).get('pkgs.java', {}).get('base') if not pkgs_json: return [], handled_pkgtypes pkgs = [] for path, pkg_str in list(pkgs_json.items()): pkg_json = json.loads(pkg_str) n = ImagePackage() # primary keys # TODO - some java names have a version in it, need to clean that up n.name = pkg_json.get('name') n.pkg_type = 'java' n.arch = 'N/A' n.pkg_path = path metaversion = None versions_json = {} for k in ['maven-version', 'implementation-version', 'specification-version']: if not metaversion and pkg_json.get(k, 'N/A') != 'N/A': metaversion = pkg_json.get(k) versions_json[k] = pkg_json.get(k, 'N/A') n.image_user_id = containing_image.user_id n.image_id = containing_image.id # other non-PK values n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.distro_name = 'java' n.distro_version = 'N/A' n.like_distro = 'java' m = pkg_json.get('metadata') m['java_versions'] = versions_json n.metadata_json = m fullname = n.name pomprops = n.get_pom_properties() pomversion = None if pomprops: fullname = "{}:{}".format(pomprops.get('groupId'), pomprops.get('artifactId')) pomversion = pomprops.get('version', None) n.normalized_src_pkg = fullname n.src_pkg = fullname # final version decision - try our best to get an accurate version/name pair n.version = 'N/A' if pomversion: n.version = pomversion elif metaversion: n.version = metaversion else: try: patt = re.match(r"(.*)-(([\d]\.)+.*)", n.name) if patt and patt.group(1): n.version = patt.group(2) n.name = patt.group(1) except Exception as err: pass n.fullversion = n.version pkgs.append(n) return pkgs, handled_pkgtypes
def policy_engine_packages_upgrade_007_008(): from anchore_engine.db import session_scope, ImagePackage, ImageNpm, ImageGem, Image if True: engine = anchore_engine.db.entities.common.get_engine() file_path_length = 512 hash_length = 80 new_columns = [ { 'table_name': 'image_packages', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), Column('pkg_path_hash', String(hash_length)), Column('metadata_json', StringJSON), ] }, { 'table_name': 'image_package_vulnerabilities', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] }, { 'table_name': 'image_package_db_entries', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] } ] log.err("creating new table columns") for table in new_columns: for column in table['columns']: log.err("creating new column ({}) in table ({})".format(column.name, table.get('table_name', ""))) try: cn = column.compile(dialect=engine.dialect) ct = column.type.compile(engine.dialect) engine.execute('ALTER TABLE %s ADD COLUMN IF NOT EXISTS %s %s' % (table['table_name'], cn, ct)) except Exception as e: log.err('failed to perform DB upgrade on {} adding column - exception: {}'.format(table, str(e))) raise Exception('failed to perform DB upgrade on {} adding column - exception: {}'.format(table, str(e))) # populate the new columns log.err("updating new column (pkg_path) - this may take a while") for table in ['image_packages', 'image_package_vulnerabilities']: log.err("updating table ({}) column (pkg_path)".format(table)) done = False while not done: startts = time.time() rc = engine.execute("UPDATE {} set pkg_path='pkgdb' where pkg_path is null".format(table)) log.err("updated {} records in {} (time={}), performing next range".format(rc.rowcount, table, time.time() - startts)) done=True with session_scope() as dbsession: db_image_ids = dbsession.query(Image.id).distinct().all() total_records = len(db_image_ids) record_count = 0 for record in db_image_ids: db_image_id = record[0] startts = time.time() rc = engine.execute("UPDATE image_package_db_entries set pkg_path='pkgdb' where image_id='{}' and pkg_path is null".format(db_image_id)) record_count = record_count + 1 log.err("updated {} image ({} / {}) in {} (time={}), performing next image update".format(db_image_id, record_count, total_records, 'image_package_db_entries', time.time() - startts)) exec_commands = [ 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkg_image_id_fkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_image_id_fkey', 'ALTER TABLE image_packages DROP CONSTRAINT IF EXISTS image_packages_pkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_pkey', 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkey', ] log.err("dropping primary key / foreign key relationships for new column") cmdcount = 1 for command in exec_commands: log.err("running update operation {} of {}: {}".format(cmdcount, len(exec_commands), command)) engine.execute(command) cmdcount = cmdcount + 1 exec_commands = [ 'ALTER TABLE image_packages ADD PRIMARY KEY (image_id,image_user_id,name,version,pkg_type,arch,pkg_path)', 'ALTER TABLE image_package_vulnerabilities ADD PRIMARY KEY (pkg_user_id,pkg_image_id,pkg_name,pkg_version,pkg_type,pkg_arch,vulnerability_id,pkg_path)', 'ALTER TABLE image_package_db_entries ADD PRIMARY KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path,file_path)', 'ALTER TABLE image_package_vulnerabilities ADD CONSTRAINT image_package_vulnerabilities_pkg_image_id_fkey FOREIGN KEY (pkg_image_id, pkg_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', 'ALTER TABLE image_package_db_entries ADD CONSTRAINT image_package_db_entries_image_id_fkey FOREIGN KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', ] log.err("updating primary key / foreign key relationships for new column - this may take a while") cmdcount = 1 for command in exec_commands: log.err("running update operation {} of {}: {}".format(cmdcount, len(exec_commands), command)) engine.execute(command) cmdcount = cmdcount + 1 log.err("converting ImageNpm and ImageGem records into ImagePackage records - this may take a while") # migrate ImageNpm and ImageGem records into ImagePackage records with session_scope() as dbsession: db_npms = dbsession.query(ImageNpm) total_npms = dbsession.query(ImageNpm).count() db_gems = dbsession.query(ImageGem) total_gems = dbsession.query(ImageGem).count() npms = [] chunk_size = 8192 record_count = 0 try: for n in db_npms: np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'npm' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'npm' np.distro_version = 'N/A' np.like_distro = 'npm' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname npms.append(np) if len(npms) >= chunk_size: startts = time.time() try: with session_scope() as dbsession: dbsession.bulk_save_objects(npms) record_count = record_count + chunk_size except: log.err("skipping duplicates") record_count = record_count + chunk_size log.err("merged {} / {} npm records (time={}), performing next range".format(record_count, total_npms, time.time() - startts)) npms = [] if len(npms): startts = time.time() try: with session_scope() as dbsession: dbsession.bulk_save_objects(npms) record_count = record_count + len(npms) except: log.err("skipping duplicates") record_count = record_count + len(npms) log.err("final merged {} / {} npm records (time={})".format(record_count, total_npms, time.time() - startts)) except Exception as err: raise err gems = [] chunk_size = 8192 record_count = 0 try: for n in db_gems: np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'gem' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'gem' np.distro_version = 'N/A' np.like_distro = 'gem' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname gems.append(np) if len(gems) >= chunk_size: startts = time.time() try: with session_scope() as dbsession: dbsession.bulk_save_objects(gems) record_count = record_count + chunk_size except: log.err("skipping duplicates") record_count = record_count + chunk_size log.err("merged {} / {} gem records (time={}), performing next range".format(record_count, total_gems, time.time() - startts)) gems = [] if len(gems): startts = time.time() try: with session_scope() as dbsession: dbsession.bulk_save_objects(gems) record_count = record_count + len(gems) except: log.err("skipping duplicates") record_count = record_count + len(gems) log.err("final merged {} / {} gem records (time={})".format(record_count, total_gems, time.time() - startts)) except Exception as err: raise err
def db_upgrade_007_008(): from anchore_engine.db import session_scope, ImagePackage, ImageNpm, ImageGem if True: engine = anchore_engine.db.entities.common.get_engine() file_path_length = 512 hash_length = 80 new_columns = [{ 'table_name': 'image_packages', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), Column('pkg_path_hash', String(hash_length)), Column('metadata_json', StringJSON), ] }, { 'table_name': 'image_package_vulnerabilities', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] }, { 'table_name': 'image_package_db_entries', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] }] log.err("creating new table columns") for table in new_columns: for column in table['columns']: log.err("creating new column ({}) in table ({})".format( column.name, table.get('table_name', ""))) try: cn = column.compile(dialect=engine.dialect) ct = column.type.compile(engine.dialect) engine.execute( 'ALTER TABLE %s ADD COLUMN IF NOT EXISTS %s %s' % (table['table_name'], cn, ct)) except Exception as e: log.err( 'failed to perform DB upgrade on {} adding column - exception: {}' .format(table, str(e))) raise Exception( 'failed to perform DB upgrade on {} adding column - exception: {}' .format(table, str(e))) # populate the new columns for table in [ 'image_packages', 'image_package_vulnerabilities', 'image_package_db_entries' ]: log.err("updating table ({}) column (pkg_path)".format(table)) engine.execute( "UPDATE {} set pkg_path='pkgdb' where pkg_path is null".format( table)) exec_commands = [ 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkg_image_id_fkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_image_id_fkey', 'ALTER TABLE image_packages DROP CONSTRAINT IF EXISTS image_packages_pkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_pkey', 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkey', 'ALTER TABLE image_packages ADD PRIMARY KEY (image_id,image_user_id,name,version,pkg_type,arch,pkg_path)', 'ALTER TABLE image_package_vulnerabilities ADD PRIMARY KEY (pkg_user_id,pkg_image_id,pkg_name,pkg_version,pkg_type,pkg_arch,vulnerability_id,pkg_path)', 'ALTER TABLE image_package_db_entries ADD PRIMARY KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path,file_path)', 'ALTER TABLE image_package_vulnerabilities ADD CONSTRAINT image_package_vulnerabilities_pkg_image_id_fkey FOREIGN KEY (pkg_image_id, pkg_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', 'ALTER TABLE image_package_db_entries ADD CONSTRAINT image_package_db_entries_image_id_fkey FOREIGN KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', ] log.err( "updating primary key / foreign key relationships for new column") cmdcount = 1 for command in exec_commands: log.err("running update operation {} of {}: {}".format( cmdcount, len(exec_commands), command)) engine.execute(command) cmdcount = cmdcount + 1 log.err( "converting ImageNpm and ImageGem records into ImagePackage records" ) # migrate ImageNpm and ImageGem records into ImagePackage records with session_scope() as dbsession: db_npms = dbsession.query(ImageNpm) db_gems = dbsession.query(ImageGem) gems = [] npms = [] try: for n in db_npms: np = ImagePackage() # primary keys np.name = n.name np.version = n.versions_json[0] np.pkg_type = 'npm' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'npm' np.distro_version = 'N/A' np.like_distro = 'npm' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname npms.append(np) except Exception as err: raise err try: for n in db_gems: np = ImagePackage() # primary keys np.name = n.name np.version = n.versions_json[0] np.pkg_type = 'gem' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'gem' np.distro_version = 'N/A' np.like_distro = 'gem' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname gems.append(np) except Exception as err: raise err with session_scope() as dbsession: log.err("merging npms: {} records to merge".format(len(npms))) try: for npm in npms: dbsession.merge(npm) except Exception as err: raise err with session_scope() as dbsession: log.err("merging gems: {} records to merge".format(len(gems))) try: for gem in gems: dbsession.merge(gem) except Exception as err: raise err
def test_cve_updates(test_data_env): test_env = test_data_env test_env.init_feeds() test_user_id = 'test1' test_img_id = 'img1' test_image = Image(user_id=test_user_id, id=test_img_id, distro_name='centos', distro_version='7') test_image.familytree_json = [test_img_id] test_image.layers_json = [test_img_id] test_image.layer_info_json = ['somelayer_here'] test_image.like_distro = 'centos' test_image.state = 'analyzed' test_image.digest = 'digest1' test_image.anchore_type = 'undefined' test_image.dockerfile_mode = 'Guessed' test_image.docker_history_json = ['line1', 'line2'] test_image.docker_data_json = {'Config': {}, 'ContainerConfig': {}} test_image.dockerfile_contents = 'FROM BLAH' test_package = ImagePackage(image_user_id=test_user_id, image_id=test_img_id, name='testpackage', version='1.0', pkg_type='RPM') test_package.src_pkg = 'testpackage' test_package.distro_name = 'centos' test_package.distro_version = '7' test_package.like_distro = 'centos' test_package.license = 'apache2' test_package.fullversion = '1.0' test_package.normalized_src_pkg = '1.0' test_package.release = '' test_package.size = 1000 test_package.origin = 'upstream' test_package.arch = 'x86_64' test_package.image = test_image test_cve = Vulnerability(id='CVE123', namespace_name='centos:7') test_cve.severity = 'High' test_cve.description = 'some test cve' test_cve.cvss2_score = '1.0' test_cve.metadata_json = {} test_cve.cvss2_vectors = '' test_cve.link = 'http://mitre.com/cve123' test_fixedin = FixedArtifact(vulnerability_id=test_cve.id) test_fixedin.name = 'testpackage' test_fixedin.version = '1.1' test_fixedin.version_format = 'rpm' test_fixedin.epochless_version = '1.1' test_fixedin.include_later_versions = True test_fixedin.parent = test_cve test_cve.fixed_in = [test_fixedin] test_vulnin = VulnerableArtifact(vulnerability_id=test_cve.id) test_vulnin.name = 'testpackage' test_vulnin.version = '0.9' test_vulnin.epochless_version = '0.9' test_vulnin.namespace_name = 'centos:7' test_vulnin.version_format = 'rpm' test_vulnin.include_previous_versions = False test_vulnin.parent = test_cve test_cve.vulnerable_in = [test_vulnin] db = get_session() try: db.add(test_image) db.add(test_package) db.commit() except sqlalchemy.exc.IntegrityError: db.rollback() except Exception: logger.exception('Unexpected failure') raise db = get_session() try: db.add(test_cve) feeds.process_updated_vulnerability(db, test_cve) db.commit() except sqlalchemy.exc.IntegrityError: logger.exception('Failed!') db.rollback() finally: db = get_session() i = db.query(Image).get((test_img_id, test_user_id)) print(('Vulns: {}'.format(i.vulnerabilities()))) db.commit() test_cve2 = Vulnerability(id='CVE123', namespace_name='centos:7') test_cve2.severity = 'Medium' test_cve2.description = 'some test cve' test_cve2.cvss2_score = '1.0' test_cve2.metadata_json = {} test_cve2.cvss2_vectors = '' test_cve2.link = 'http://mitre.com/cve123' fix2 = FixedArtifact(name='pkg2', version='1.2', epochless_version='1.2') fix2.namespace_name = 'centos:7' fix2.vulnerability_id = test_cve2.id test_cve2.fixed_in = [fix2] db = get_session() try: t2 = db.merge(test_cve2) db.add(t2) feeds.process_updated_vulnerability(db, t2) db.commit() except sqlalchemy.exc.IntegrityError: logger.exception('Failed!') db.rollback() finally: db = get_session() i = db.query(Image).get((test_img_id, test_user_id)) print(('Vulns: {}'.format(i.vulnerabilities()))) db.commit()
def test_github_advisory_fixed_in(test_data_env): test_env = test_data_env test_env.init_feeds() test_user_id = 'test1' test_img_id = 'img1' test_image = Image( user_id=test_user_id, id=test_img_id, distro_name='centos', distro_version='7' ) test_image.familytree_json = [test_img_id] test_image.layers_json = [test_img_id] test_image.layer_info_json = ['somelayer_here'] test_image.like_distro = 'centos' test_image.state = 'analyzed' test_image.digest = 'digest1' test_image.anchore_type = 'undefined' test_image.dockerfile_mode = 'Guessed' test_image.docker_history_json = ['line1', 'line2'] test_image.docker_data_json = {'Config': {}, 'ContainerConfig': {}} test_image.dockerfile_contents = 'FROM BLAH' test_package = ImagePackage( image_user_id=test_user_id, image_id=test_img_id, name='testpackage', version='1.0', pkg_type='python' ) test_package.src_pkg = 'testpackage' test_package.distro_name = 'centos' test_package.distro_version = '7' test_package.like_distro = 'centos' test_package.license = 'apache2' test_package.fullversion = '1.0' test_package.normalized_src_pkg = '1.0' test_package.release = '' test_package.size = 1000 test_package.origin = 'upstream' test_package.arch = 'x86_64' test_package.image = test_image test_cve = Vulnerability(id='GHSA-rpch-cqj9-h65r', namespace_name='github:python') test_cve.severity = 'High' test_cve.description = 'some advisory ghsa' test_cve.link = 'http://mitre.com/cve123' test_fixedin = FixedArtifact(vulnerability_id=test_cve.id) test_fixedin.name = 'testpackage' test_fixedin.version = 'None' test_fixedin.fix_metadata = {'first_patched_version': '1.2'} test_fixedin.version_format = 'semver' test_fixedin.parent = test_cve test_cve.fixed_in = [test_fixedin] db = get_session() try: db.add(test_image) db.add(test_package) db.commit() except sqlalchemy.exc.IntegrityError: db.rollback() except Exception: logger.exception('Unexpected failure') raise db = get_session() # XXX This needs to be a fixture try: db.add(test_cve) feeds.process_updated_vulnerability(db, test_cve) db.commit() except sqlalchemy.exc.IntegrityError: logger.exception('Failed!') db.rollback() db = get_session() image_vuln = db.query(Image).get((test_img_id, test_user_id)) # should be one vulnerability vulnerabilities = image_vuln.vulnerabilities() assert len(vulnerabilities) == 1 img_pkg_vuln = vulnerabilities[0] assert img_pkg_vuln.fixed_in() == '1.2'
def test_cve_updates(test_data_env): test_env = test_data_env test_env.init_feeds() test_user_id = "test1" test_img_id = "img1" test_image = Image(user_id=test_user_id, id=test_img_id, distro_name="centos", distro_version="7") test_image.familytree_json = [test_img_id] test_image.layers_json = [test_img_id] test_image.layer_info_json = ["somelayer_here"] test_image.like_distro = "centos" test_image.state = "analyzed" test_image.digest = "digest1" test_image.anchore_type = "undefined" test_image.dockerfile_mode = "Guessed" test_image.docker_history_json = ["line1", "line2"] test_image.docker_data_json = {"Config": {}, "ContainerConfig": {}} test_image.dockerfile_contents = "FROM BLAH" test_package = ImagePackage( image_user_id=test_user_id, image_id=test_img_id, name="testpackage", version="1.0", pkg_type="RPM", ) test_package.src_pkg = "testpackage" test_package.distro_name = "centos" test_package.distro_version = "7" test_package.like_distro = "centos" test_package.license = "apache2" test_package.fullversion = "1.0" test_package.normalized_src_pkg = "1.0" test_package.release = "" test_package.size = 1000 test_package.origin = "upstream" test_package.arch = "x86_64" test_package.image = test_image test_cve = Vulnerability(id="CVE123", namespace_name="centos:7") test_cve.severity = "High" test_cve.description = "some test cve" test_cve.cvss2_score = "1.0" test_cve.metadata_json = {} test_cve.cvss2_vectors = "" test_cve.link = "http://mitre.com/cve123" test_fixedin = FixedArtifact(vulnerability_id=test_cve.id) test_fixedin.name = "testpackage" test_fixedin.version = "1.1" test_fixedin.version_format = "rpm" test_fixedin.epochless_version = "1.1" test_fixedin.include_later_versions = True test_fixedin.parent = test_cve test_cve.fixed_in = [test_fixedin] test_vulnin = VulnerableArtifact(vulnerability_id=test_cve.id) test_vulnin.name = "testpackage" test_vulnin.version = "0.9" test_vulnin.epochless_version = "0.9" test_vulnin.namespace_name = "centos:7" test_vulnin.version_format = "rpm" test_vulnin.include_previous_versions = False test_vulnin.parent = test_cve test_cve.vulnerable_in = [test_vulnin] db = get_session() try: db.add(test_image) db.add(test_package) db.commit() except sqlalchemy.exc.IntegrityError: db.rollback() except Exception: logger.exception("Unexpected failure") raise db = get_session() try: db.add(test_cve) feeds.process_updated_vulnerability(db, test_cve) db.commit() except sqlalchemy.exc.IntegrityError: logger.exception("Failed!") db.rollback() finally: db = get_session() i = db.query(Image).get((test_img_id, test_user_id)) print(("Vulns: {}".format(i.vulnerabilities()))) db.commit() test_cve2 = Vulnerability(id="CVE123", namespace_name="centos:7") test_cve2.severity = "Medium" test_cve2.description = "some test cve" test_cve2.cvss2_score = "1.0" test_cve2.metadata_json = {} test_cve2.cvss2_vectors = "" test_cve2.link = "http://mitre.com/cve123" fix2 = FixedArtifact(name="pkg2", version="1.2", epochless_version="1.2") fix2.namespace_name = "centos:7" fix2.vulnerability_id = test_cve2.id test_cve2.fixed_in = [fix2] db = get_session() try: t2 = db.merge(test_cve2) db.add(t2) feeds.process_updated_vulnerability(db, t2) db.commit() except sqlalchemy.exc.IntegrityError: logger.exception("Failed!") db.rollback() finally: db = get_session() i = db.query(Image).get((test_img_id, test_user_id)) print(("Vulns: {}".format(i.vulnerabilities()))) db.commit()
def test_github_advisory_fixed_in(test_data_env): test_env = test_data_env test_env.init_feeds() test_user_id = "test1" test_img_id = "img1" test_image = Image(user_id=test_user_id, id=test_img_id, distro_name="centos", distro_version="7") test_image.familytree_json = [test_img_id] test_image.layers_json = [test_img_id] test_image.layer_info_json = ["somelayer_here"] test_image.like_distro = "centos" test_image.state = "analyzed" test_image.digest = "digest1" test_image.anchore_type = "undefined" test_image.dockerfile_mode = "Guessed" test_image.docker_history_json = ["line1", "line2"] test_image.docker_data_json = {"Config": {}, "ContainerConfig": {}} test_image.dockerfile_contents = "FROM BLAH" test_package = ImagePackage( image_user_id=test_user_id, image_id=test_img_id, name="testpackage", version="1.0", pkg_type="python", ) test_package.src_pkg = "testpackage" test_package.distro_name = "centos" test_package.distro_version = "7" test_package.like_distro = "centos" test_package.license = "apache2" test_package.fullversion = "1.0" test_package.normalized_src_pkg = "1.0" test_package.release = "" test_package.size = 1000 test_package.origin = "upstream" test_package.arch = "x86_64" test_package.image = test_image test_cve = Vulnerability(id="GHSA-rpch-cqj9-h65r", namespace_name="github:python") test_cve.severity = "High" test_cve.description = "some advisory ghsa" test_cve.link = "http://mitre.com/cve123" test_fixedin = FixedArtifact(vulnerability_id=test_cve.id) test_fixedin.name = "testpackage" test_fixedin.version = "None" test_fixedin.fix_metadata = {"first_patched_version": "1.2"} test_fixedin.version_format = "semver" test_fixedin.parent = test_cve test_cve.fixed_in = [test_fixedin] db = get_session() try: db.add(test_image) db.add(test_package) db.commit() except sqlalchemy.exc.IntegrityError: db.rollback() except Exception: logger.exception("Unexpected failure") raise db = get_session() # XXX This needs to be a fixture try: db.add(test_cve) feeds.process_updated_vulnerability(db, test_cve) db.commit() except sqlalchemy.exc.IntegrityError: logger.exception("Failed!") db.rollback() db = get_session() image_vuln = db.query(Image).get((test_img_id, test_user_id)) # should be one vulnerability vulnerabilities = image_vuln.vulnerabilities() assert len(vulnerabilities) == 1 img_pkg_vuln = vulnerabilities[0] assert img_pkg_vuln.fixed_in() == "1.2"