Ejemplo n.º 1
0
    def load_javas(self, analysis_json, containing_image):
        pkgs_json = analysis_json.get('package_list', {}).get('pkgs.java',
                                                              {}).get('base')
        if not pkgs_json:
            return []

        pkgs = []
        for path, pkg_str in list(pkgs_json.items()):
            pkg_json = json.loads(pkg_str)

            n = ImagePackage()

            # primary keys
            # TODO - some java names have a version in it, need to clean that up
            n.name = pkg_json.get('name')
            n.pkg_type = 'java'
            n.arch = 'N/A'
            n.pkg_path = path
            version = None
            versions_json = {}
            for k in [
                    'maven-version', 'implementation-version',
                    'specification-version'
            ]:
                if not version and pkg_json.get(k, 'N/A') != 'N/A':
                    version = pkg_json.get(k)
                versions_json[k] = pkg_json.get(k, 'N/A')
            if version:
                n.version = version
            else:
                n.version = 'N/A'

            n.image_user_id = containing_image.user_id
            n.image_id = containing_image.id

            # other non-PK values
            n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest()
            n.distro_name = 'java'
            n.distro_version = 'N/A'
            n.like_distro = 'java'
            n.fullversion = n.version

            m = pkg_json.get('metadata')
            m['java_versions'] = versions_json
            n.metadata_json = m

            fullname = n.name
            pomprops = n.get_pom_properties()
            if pomprops:
                fullname = "{}:{}".format(pomprops.get('groupId'),
                                          pomprops.get('artifactId'))

            n.normalized_src_pkg = fullname
            n.src_pkg = fullname
            pkgs.append(n)

        return pkgs
Ejemplo n.º 2
0
    def load_npms(self, analysis_json, containing_image):
        handled_pkgtypes = ['pkgs.npms']        
        npms_json = analysis_json.get('package_list', {}).get('pkgs.npms',{}).get('base')
        if not npms_json:
            return [], handled_pkgtypes

            
        npms = []
        image_packages = []
        for path, npm_str in list(npms_json.items()):
            npm_json = json.loads(npm_str)

            # TODO: remove this usage of ImageNPM, that is deprecated
            n = ImageNpm()
            n.path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest()
            n.path = path
            n.name = npm_json.get('name')
            n.src_pkg = npm_json.get('src_pkg')
            n.origins_json = npm_json.get('origins')
            n.licenses_json = npm_json.get('lics')
            n.latest = npm_json.get('latest')
            n.versions_json = npm_json.get('versions')
            n.image_user_id = containing_image.user_id
            n.image_id = containing_image.id
            #npms.append(n)

            np = ImagePackage()
            # primary keys
            np.name = n.name
            if len(n.versions_json): 
                version = n.versions_json[0]
            else:
                version = "N/A"
            np.version = version
            np.pkg_type = 'npm'
            np.arch = 'N/A'
            np.image_user_id = n.image_user_id
            np.image_id = n.image_id
            np.pkg_path = n.path
            # other
            np.pkg_path_hash = n.path_hash
            np.distro_name = 'npm'
            np.distro_version = 'N/A'
            np.like_distro = 'npm'
            np.fullversion = np.version
            np.license = ' '.join(n.licenses_json)
            np.origin = ' '.join(n.origins_json)
            #np.metadata_json = pkg_json.get('metadata')
            fullname = np.name
            np.normalized_src_pkg = fullname
            np.src_pkg = fullname
            image_packages.append(np)

        return image_packages, handled_pkgtypes
Ejemplo n.º 3
0
    def load_gems(self, analysis_json, containing_image):
        gems_json = analysis_json.get('package_list', {}).get('pkgs.gems',
                                                              {}).get('base')
        if not gems_json:
            return []

        gems = []
        image_packages = []
        for path, gem_str in list(gems_json.items()):
            gem_json = json.loads(gem_str)
            n = ImageGem()
            n.path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest()
            n.path = path
            n.name = gem_json.get('name')
            n.src_pkg = gem_json.get('src_pkg')
            n.origins_json = gem_json.get('origins')
            n.licenses_json = gem_json.get('lics')
            n.versions_json = gem_json.get('versions')
            n.latest = gem_json.get('latest')
            n.image_user_id = containing_image.user_id
            n.image_id = containing_image.id
            #gems.append(n)

            np = ImagePackage()
            # primary keys
            np.name = n.name
            if len(n.versions_json):
                version = n.versions_json[0]
            else:
                version = "N/A"
            np.version = version
            np.pkg_type = 'gem'
            np.arch = 'N/A'
            np.image_user_id = n.image_user_id
            np.image_id = n.image_id
            np.pkg_path = n.path
            # other
            np.pkg_path_hash = n.path_hash
            np.distro_name = 'gem'
            np.distro_version = 'N/A'
            np.like_distro = 'gem'
            np.fullversion = np.version
            np.license = ' '.join(n.licenses_json)
            np.origin = ' '.join(n.origins_json)
            #np.metadata_json = pkg_json.get('metadata')
            fullname = np.name
            np.normalized_src_pkg = fullname
            np.src_pkg = fullname
            image_packages.append(np)

        return image_packages
Ejemplo n.º 4
0
    def load_generic_packages(self,
                              analysis_json,
                              containing_image,
                              excludes=[]):
        pkgs = []
        handled_pkgtypes = []
        package_types = analysis_json.get('package_list', {})
        for package_type in package_types:
            if package_type not in excludes:
                patt = re.match(r"pkgs\.(.*)", package_type)
                if patt:
                    ptype = patt.group(1)
                    handled_pkgtypes.append(ptype)
                    pkgs_json = analysis_json.get('package_list',
                                                  {}).get(package_type,
                                                          {}).get('base', {})

                    if not pkgs_json:
                        return [], handled_pkgtypes

                    for path, pkg_str in list(pkgs_json.items()):
                        pkg_json = json.loads(pkg_str)
                        n = ImagePackage()
                        # primary keys
                        n.name = pkg_json.get('name')
                        n.pkg_path = path
                        n.version = pkg_json.get('version')
                        n.pkg_type = pkg_json.get('type', 'N/A')
                        n.arch = 'N/A'
                        n.image_user_id = n.image_user_id
                        n.image_id = n.image_id
                        # other
                        n.pkg_path_hash = hashlib.sha256(
                            ensure_bytes(path)).hexdigest()
                        n.distro_name = n.pkg_type
                        n.distro_version = 'N/A'
                        n.like_distro = n.pkg_type
                        n.fullversion = n.version
                        n.license = pkg_json.get('license', 'N/A')
                        n.origin = pkg_json.get('origin', 'N/A')

                        fullname = n.name
                        n.normalized_src_pkg = fullname
                        n.src_pkg = fullname
                        pkgs.append(n)

        return pkgs, handled_pkgtypes
Ejemplo n.º 5
0
    def load_pythons(self, analysis_json, containing_image):

        pkgs_json = analysis_json.get('package_list',
                                      {}).get('pkgs.python', {}).get('base')
        if not pkgs_json:
            return []

        pkgs = []
        for path, pkg_str in list(pkgs_json.items()):
            pkg_json = json.loads(pkg_str)

            n = ImagePackage()
            # primary keys
            n.name = pkg_json.get('name')
            n.pkg_path = path
            n.version = pkg_json.get('version')
            n.pkg_type = 'python'
            n.arch = 'N/A'
            n.image_user_id = n.image_user_id
            n.image_id = n.image_id
            # other
            n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest()
            n.distro_name = 'python'
            n.distro_version = 'N/A'
            n.like_distro = 'python'
            n.fullversion = n.version
            n.license = pkg_json.get('license')
            n.origin = pkg_json.get('origin')

            m = {
                'python_distribution_metadata': pkg_json.get('metadata'),
                'files': pkg_json.get('files')
            }
            n.metadata_json = m

            fullname = n.name
            n.normalized_src_pkg = fullname
            n.src_pkg = fullname
            pkgs.append(n)

        return pkgs
Ejemplo n.º 6
0
    def load_and_normalize_packages(self, package_analysis_json, image_obj):
        """
        Loads and normalizes package data from all distros

        :param image_obj:
        :param package_analysis_json:
        :return: list of Package objects that can be added to an image
        """
        pkgs = []

        img_distro = DistroNamespace.for_obj(image_obj)

        # pkgs.allinfo handling
        pkgs_all = package_analysis_json.get('pkgs.allinfo', {}).values()
        if not pkgs_all:
            return []
        else:
            pkgs_all = pkgs_all[0]

        for pkg_name, metadata_str in pkgs_all.items():
            metadata = json.loads(metadata_str)

            p = ImagePackage()
            p.distro_name = image_obj.distro_name
            p.distro_version = image_obj.distro_version
            p.like_distro = image_obj.like_distro

            p.name = pkg_name
            p.version = metadata.get('version')
            p.origin = metadata.get('origin')
            p.size = metadata.get('size')
            p.arch = metadata.get('arch')
            p.license = metadata.get('license') if metadata.get(
                'license') else metadata.get('lics')
            p.release = metadata.get('release', 'N/A')
            p.pkg_type = metadata.get('type')
            p.src_pkg = metadata.get('sourcepkg')
            p.image_user_id = image_obj.user_id
            p.image_id = image_obj.id

            if 'files' in metadata:
                # Handle file data
                p.files = metadata.get('files')

            if p.release != 'N/A':
                p.fullversion = p.version + '-' + p.release
            else:
                p.fullversion = p.version

            if img_distro.flavor == 'DEB':
                cleanvers = re.sub(re.escape("+b") + "\d+.*", "", p.version)
                spkg = re.sub(re.escape("-" + cleanvers), "", p.src_pkg)
            else:
                spkg = re.sub(re.escape("-" + p.version) + ".*", "", p.src_pkg)

            p.normalized_src_pkg = spkg
            pkgs.append(p)

        if pkgs:
            return pkgs
        else:
            log.warn('Pkg Allinfo not found, reverting to using pkgs.all')

        all_pkgs = package_analysis_json['pkgs.all']['base']
        all_pkgs_src = package_analysis_json['pkgs_plus_source.all']['base']

        for pkg_name, version in all_pkgs.items():
            p = ImagePackage()
            p.image_user_id = image_obj.user_id
            p.image_id = image_obj.id
            p.name = pkg_name
            p.version = version
            p.fullversion = all_pkgs_src[pkg_name]

            if img_distro.flavor == 'RHEL':
                name, parsed_version, release, epoch, arch = split_rpm_filename(
                    pkg_name + '-' + version + '.tmparch.rpm')
                p.version = parsed_version
                p.release = release
                p.pkg_type = 'RPM'
                p.origin = 'N/A'
                p.src_pkg = 'N/A'
                p.license = 'N/A'
                p.arch = 'N/A'
            elif img_distro.flavor == 'DEB':
                try:
                    p.version, p.release = version.split('-')
                except:
                    p.version = version
                    p.release = None

        return pkgs
Ejemplo n.º 7
0
def policy_engine_packages_upgrade_007_008():
    from anchore_engine.db import session_scope, ImagePackage, ImageNpm, ImageGem, Image
    if True:
        engine = anchore_engine.db.entities.common.get_engine()

        file_path_length = 512
        hash_length = 80

        new_columns = [{
            'table_name':
            'image_packages',
            'columns': [
                Column('pkg_path', String(file_path_length), primary_key=True),
                Column('pkg_path_hash', String(hash_length)),
                Column('metadata_json', StringJSON),
            ]
        }, {
            'table_name':
            'image_package_vulnerabilities',
            'columns': [
                Column('pkg_path', String(file_path_length), primary_key=True),
            ]
        }, {
            'table_name':
            'image_package_db_entries',
            'columns': [
                Column('pkg_path', String(file_path_length), primary_key=True),
            ]
        }]

        log.err("creating new table columns")
        for table in new_columns:
            for column in table['columns']:
                log.err("creating new column ({}) in table ({})".format(
                    column.name, table.get('table_name', "")))
                try:
                    cn = column.compile(dialect=engine.dialect)
                    ct = column.type.compile(engine.dialect)
                    engine.execute(
                        'ALTER TABLE %s ADD COLUMN IF NOT EXISTS %s %s' %
                        (table['table_name'], cn, ct))
                except Exception as e:
                    log.err(
                        'failed to perform DB upgrade on {} adding column - exception: {}'
                        .format(table, str(e)))
                    raise Exception(
                        'failed to perform DB upgrade on {} adding column - exception: {}'
                        .format(table, str(e)))

        # populate the new columns
        log.err("updating new column (pkg_path) - this may take a while")
        for table in ['image_packages', 'image_package_vulnerabilities']:
            log.err("updating table ({}) column (pkg_path)".format(table))
            done = False
            while not done:
                startts = time.time()
                rc = engine.execute(
                    "UPDATE {} set pkg_path='pkgdb' where pkg_path is null".
                    format(table))
                log.err(
                    "updated {} records in {} (time={}), performing next range"
                    .format(rc.rowcount, table,
                            time.time() - startts))
                done = True

        with session_scope() as dbsession:
            db_image_ids = dbsession.query(Image.id).distinct().all()

        total_records = len(db_image_ids)
        record_count = 0
        for record in db_image_ids:
            db_image_id = record[0]
            startts = time.time()
            rc = engine.execute(
                "UPDATE image_package_db_entries set pkg_path='pkgdb' where image_id='{}' and pkg_path is null"
                .format(db_image_id))
            record_count = record_count + 1
            log.err(
                "updated {} image ({} / {}) in {} (time={}), performing next image update"
                .format(db_image_id, record_count, total_records,
                        'image_package_db_entries',
                        time.time() - startts))

        exec_commands = [
            'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkg_image_id_fkey',
            'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_image_id_fkey',
            'ALTER TABLE image_packages DROP CONSTRAINT IF EXISTS image_packages_pkey',
            'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_pkey',
            'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkey',
        ]

        log.err(
            "dropping primary key / foreign key relationships for new column")
        cmdcount = 1
        for command in exec_commands:
            log.err("running update operation {} of {}: {}".format(
                cmdcount, len(exec_commands), command))
            engine.execute(command)
            cmdcount = cmdcount + 1

        exec_commands = [
            'ALTER TABLE image_packages ADD PRIMARY KEY (image_id,image_user_id,name,version,pkg_type,arch,pkg_path)',
            'ALTER TABLE image_package_vulnerabilities ADD PRIMARY KEY (pkg_user_id,pkg_image_id,pkg_name,pkg_version,pkg_type,pkg_arch,vulnerability_id,pkg_path)',
            'ALTER TABLE image_package_db_entries ADD PRIMARY KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path,file_path)',
            'ALTER TABLE image_package_vulnerabilities ADD CONSTRAINT image_package_vulnerabilities_pkg_image_id_fkey FOREIGN KEY (pkg_image_id, pkg_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE',
            'ALTER TABLE image_package_db_entries ADD CONSTRAINT image_package_db_entries_image_id_fkey FOREIGN KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE',

            # These are helpers for the upgrade itself, not needed by the functioning system. Needed for large npm/gem tables and pagination support
            "CREATE SEQUENCE IF NOT EXISTS image_npms_seq_id_seq",
            "ALTER TABLE image_npms add column IF NOT EXISTS seq_id int DEFAULT nextval('image_npms_seq_id_seq')",
            "CREATE INDEX IF NOT EXISTS idx_npm_seq ON image_npms using btree (seq_id)",
            "CREATE SEQUENCE IF NOT EXISTS image_gems_seq_id_seq",
            "ALTER TABLE image_gems add column IF NOT EXISTS seq_id int DEFAULT nextval('image_gems_seq_id_seq')",
            "CREATE INDEX IF NOT EXISTS idx_gem_seq ON image_gems using btree (seq_id)",
            "ALTER TABLE image_packages ALTER COLUMN origin TYPE varchar"
        ]

        log.err(
            "updating primary key / foreign key relationships for new column - this may take a while"
        )
        cmdcount = 1
        for command in exec_commands:
            log.err("running update operation {} of {}: {}".format(
                cmdcount, len(exec_commands), command))
            engine.execute(command)
            cmdcount = cmdcount + 1

        log.err(
            "converting ImageNpm and ImageGem records into ImagePackage records - this may take a while"
        )
        # migrate ImageNpm and ImageGem records into ImagePackage records
        with session_scope() as dbsession:
            total_npms = dbsession.query(ImageNpm).count()
            total_gems = dbsession.query(ImageGem).count()

        log.err("will migrate {} image npm records".format(total_npms))

        npms = []
        chunk_size = 8192
        record_count = 0
        skipped_count = 0

        with session_scope() as dbsession:
            try:
                last_seq = -1
                while record_count < total_npms:
                    chunk_time = time.time()
                    log.err('Processing next chunk of records')
                    for n in dbsession.query(ImageNpm).filter(
                            ImageNpm.seq_id > last_seq).limit(chunk_size):
                        np = ImagePackage()

                        # primary keys
                        np.name = n.name
                        if len(n.versions_json):
                            version = n.versions_json[0]
                        else:
                            version = "N/A"
                        np.version = version
                        np.pkg_type = 'npm'
                        np.arch = 'N/A'
                        np.image_user_id = n.image_user_id
                        np.image_id = n.image_id
                        np.pkg_path = n.path

                        # other
                        np.pkg_path_hash = n.path_hash
                        np.distro_name = 'npm'
                        np.distro_version = 'N/A'
                        np.like_distro = 'npm'
                        np.fullversion = np.version
                        np.license = ' '.join(n.licenses_json)
                        np.origin = ' '.join(n.origins_json)
                        fullname = np.name
                        np.normalized_src_pkg = fullname
                        np.src_pkg = fullname

                        npms.append(np)
                        last_seq = n.seq_id

                    if len(npms):
                        log.err('Inserting {} new records'.format(len(npms)))

                        startts = time.time()
                        try:
                            with session_scope() as dbsession2:
                                dbsession2.bulk_save_objects(npms)
                        except Exception as err:
                            log.err("skipping duplicates: {}".format(err))
                            skipped_count += 1

                        record_count = record_count + len(npms)
                        log.err("merged {} / {} npm records (time={})".format(
                            record_count, total_npms,
                            time.time() - startts))

                    log.err(
                        'Chunk took: {} seconds to process {} records'.format(
                            time.time() - chunk_time, len(npms)))
                    npms = []

            except Exception as err:
                log.err('Error during npm migration: {}'.format(err))
                raise err

        log.err("will migrate {} image gem records".format(total_gems))
        gems = []
        record_count = 0
        skipped_count = 0
        with session_scope() as dbsession:
            try:
                last_seq = -1
                while record_count < total_gems:
                    chunk_time = time.time()
                    log.err('Processing next chunk of records')
                    for n in dbsession.query(ImageGem).filter(
                            ImageGem.seq_id > last_seq).limit(chunk_size):

                        np = ImagePackage()

                        # primary keys
                        np.name = n.name
                        if len(n.versions_json):
                            version = n.versions_json[0]
                        else:
                            version = "N/A"
                        np.version = version
                        np.pkg_type = 'gem'
                        np.arch = 'N/A'
                        np.image_user_id = n.image_user_id
                        np.image_id = n.image_id
                        np.pkg_path = n.path

                        # other
                        np.pkg_path_hash = n.path_hash
                        np.distro_name = 'gem'
                        np.distro_version = 'N/A'
                        np.like_distro = 'gem'
                        np.fullversion = np.version
                        np.license = ' '.join(n.licenses_json)
                        np.origin = ' '.join(n.origins_json)
                        fullname = np.name
                        np.normalized_src_pkg = fullname
                        np.src_pkg = fullname
                        gems.append(np)
                        last_seq = n.seq_id

                    if len(gems):
                        log.err('Inserting {} new records'.format(len(gems)))

                        startts = time.time()
                        try:
                            with session_scope() as dbsession2:
                                dbsession2.bulk_save_objects(gems)
                        except Exception as err:
                            log.err("skipping duplicates: {}".format(err))
                            skipped_count += 1

                        record_count = record_count + len(gems)
                        log.err("merged {} / {} gem records (time={})".format(
                            record_count, total_gems,
                            time.time() - startts))

                    log.err(
                        'Chunk took: {} seconds to process {} records'.format(
                            time.time() - chunk_time, len(npms)))
                    gems = []

            except Exception as err:
                log.err('Error during gem migration: {}'.format(err))
                raise err
Ejemplo n.º 8
0
    def load_javas(self, analysis_json, containing_image):
        handled_pkgtypes = ['pkgs.java']        
        pkgs_json = analysis_json.get('package_list', {}).get('pkgs.java', {}).get('base')
        if not pkgs_json:
            return [], handled_pkgtypes

        pkgs = []
        for path, pkg_str in list(pkgs_json.items()):
            pkg_json = json.loads(pkg_str)
            
            n = ImagePackage()

            # primary keys
            # TODO - some java names have a version in it, need to clean that up
            n.name = pkg_json.get('name')
            n.pkg_type = 'java'
            n.arch = 'N/A'
            n.pkg_path = path

            metaversion = None
            versions_json = {}
            for k in ['maven-version', 'implementation-version', 'specification-version']:
                if not metaversion and pkg_json.get(k, 'N/A') != 'N/A':
                    metaversion = pkg_json.get(k)
                versions_json[k] = pkg_json.get(k, 'N/A')

            n.image_user_id = containing_image.user_id
            n.image_id = containing_image.id

            # other non-PK values
            n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest()
            n.distro_name = 'java'
            n.distro_version = 'N/A'
            n.like_distro = 'java'

            m = pkg_json.get('metadata')
            m['java_versions'] = versions_json
            n.metadata_json = m

            fullname = n.name
            pomprops = n.get_pom_properties()
            pomversion = None
            if pomprops:
                fullname = "{}:{}".format(pomprops.get('groupId'), pomprops.get('artifactId'))
                pomversion = pomprops.get('version', None)

            n.normalized_src_pkg = fullname
            n.src_pkg = fullname

            # final version decision - try our best to get an accurate version/name pair
            n.version = 'N/A'
            if pomversion:
                n.version = pomversion
            elif metaversion:
                n.version = metaversion
            else:
                try:
                    patt = re.match(r"(.*)-(([\d]\.)+.*)", n.name)
                    if patt and patt.group(1):
                        n.version = patt.group(2)
                        n.name = patt.group(1)
                except Exception as err:
                    pass
            n.fullversion = n.version

            pkgs.append(n)

        return pkgs, handled_pkgtypes
Ejemplo n.º 9
0
def policy_engine_packages_upgrade_007_008():
    from anchore_engine.db import session_scope, ImagePackage, ImageNpm, ImageGem, Image
    if True:
        engine = anchore_engine.db.entities.common.get_engine()

        file_path_length = 512
        hash_length = 80

        new_columns = [
            {
                'table_name': 'image_packages',
                'columns': [
                    Column('pkg_path', String(file_path_length), primary_key=True),
                    Column('pkg_path_hash', String(hash_length)),
                    Column('metadata_json', StringJSON),
                ]
            },
            {
                'table_name': 'image_package_vulnerabilities',
                'columns': [
                    Column('pkg_path', String(file_path_length), primary_key=True),
                ]
            },
            {
                'table_name': 'image_package_db_entries',
                'columns': [
                    Column('pkg_path', String(file_path_length), primary_key=True),
                ]
            }
        ]

        log.err("creating new table columns")
        for table in new_columns:
            for column in table['columns']:
                log.err("creating new column ({}) in table ({})".format(column.name, table.get('table_name', "")))
                try:
                    cn = column.compile(dialect=engine.dialect)
                    ct = column.type.compile(engine.dialect)
                    engine.execute('ALTER TABLE %s ADD COLUMN IF NOT EXISTS %s %s' % (table['table_name'], cn, ct))
                except Exception as e:
                    log.err('failed to perform DB upgrade on {} adding column - exception: {}'.format(table, str(e)))
                    raise Exception('failed to perform DB upgrade on {} adding column - exception: {}'.format(table, str(e)))


        # populate the new columns
        log.err("updating new column (pkg_path) - this may take a while")
        for table in ['image_packages', 'image_package_vulnerabilities']:
            log.err("updating table ({}) column (pkg_path)".format(table))
            done = False
            while not done:
                startts = time.time()
                rc = engine.execute("UPDATE {} set pkg_path='pkgdb' where pkg_path is null".format(table))
                log.err("updated {} records in {} (time={}), performing next range".format(rc.rowcount, table, time.time() - startts))
                done=True


        with session_scope() as dbsession:
            db_image_ids = dbsession.query(Image.id).distinct().all()

        total_records = len(db_image_ids)
        record_count = 0
        for record in db_image_ids:
            db_image_id = record[0]
            startts = time.time()
            rc = engine.execute("UPDATE image_package_db_entries set pkg_path='pkgdb' where image_id='{}' and pkg_path is null".format(db_image_id))
            record_count = record_count + 1
            log.err("updated {} image ({} / {}) in {} (time={}), performing next image update".format(db_image_id, record_count, total_records, 'image_package_db_entries', time.time() - startts))

        exec_commands = [
            'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkg_image_id_fkey',
            'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_image_id_fkey',
            'ALTER TABLE image_packages DROP CONSTRAINT IF EXISTS image_packages_pkey',
            'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_pkey',
            'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkey',
        ]

        log.err("dropping primary key / foreign key relationships for new column")
        cmdcount = 1
        for command in exec_commands:
            log.err("running update operation {} of {}: {}".format(cmdcount, len(exec_commands), command))
            engine.execute(command)
            cmdcount = cmdcount + 1

        exec_commands = [
            'ALTER TABLE image_packages ADD PRIMARY KEY (image_id,image_user_id,name,version,pkg_type,arch,pkg_path)',
            'ALTER TABLE image_package_vulnerabilities ADD PRIMARY KEY (pkg_user_id,pkg_image_id,pkg_name,pkg_version,pkg_type,pkg_arch,vulnerability_id,pkg_path)',
            'ALTER TABLE image_package_db_entries ADD PRIMARY KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path,file_path)',
            'ALTER TABLE image_package_vulnerabilities ADD CONSTRAINT image_package_vulnerabilities_pkg_image_id_fkey FOREIGN KEY (pkg_image_id, pkg_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE',
            'ALTER TABLE image_package_db_entries ADD CONSTRAINT image_package_db_entries_image_id_fkey FOREIGN KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE',
        ]

        log.err("updating primary key / foreign key relationships for new column - this may take a while")
        cmdcount = 1
        for command in exec_commands:
            log.err("running update operation {} of {}: {}".format(cmdcount, len(exec_commands), command))
            engine.execute(command)
            cmdcount = cmdcount + 1


        log.err("converting ImageNpm and ImageGem records into ImagePackage records - this may take a while")
        # migrate ImageNpm and ImageGem records into ImagePackage records
        with session_scope() as dbsession:
            db_npms = dbsession.query(ImageNpm)
            total_npms = dbsession.query(ImageNpm).count()
            db_gems = dbsession.query(ImageGem)
            total_gems = dbsession.query(ImageGem).count()

        npms = []
        chunk_size = 8192
        record_count = 0
        try:
            for n in db_npms:
                np = ImagePackage()

                # primary keys
                np.name = n.name
                if len(n.versions_json):
                    version = n.versions_json[0]
                else:
                    version = "N/A"
                np.version = version
                np.pkg_type = 'npm'
                np.arch = 'N/A'
                np.image_user_id = n.image_user_id
                np.image_id = n.image_id
                np.pkg_path = n.path

                # other
                np.pkg_path_hash = n.path_hash
                np.distro_name = 'npm'
                np.distro_version = 'N/A'
                np.like_distro = 'npm'
                np.fullversion = np.version
                np.license = ' '.join(n.licenses_json)
                np.origin = ' '.join(n.origins_json)
                fullname = np.name
                np.normalized_src_pkg = fullname
                np.src_pkg = fullname
                npms.append(np)
                if len(npms) >= chunk_size:
                    startts = time.time()
                    try:
                        with session_scope() as dbsession:
                            dbsession.bulk_save_objects(npms)
                            record_count = record_count + chunk_size
                    except:
                        log.err("skipping duplicates")
                        record_count = record_count + chunk_size
                    log.err("merged {} / {} npm records (time={}), performing next range".format(record_count, total_npms, time.time() - startts))

                    npms = []

            if len(npms):
                startts = time.time()
                try:
                    with session_scope() as dbsession:
                        dbsession.bulk_save_objects(npms)
                        record_count = record_count + len(npms)
                except:
                    log.err("skipping duplicates")
                    record_count = record_count + len(npms)
                log.err("final merged {} / {} npm records (time={})".format(record_count, total_npms, time.time() - startts))

        except Exception as err:
            raise err

        gems = []
        chunk_size = 8192
        record_count = 0
        try:
            for n in db_gems:

                np = ImagePackage()

                # primary keys
                np.name = n.name
                if len(n.versions_json):
                    version = n.versions_json[0]
                else:
                    version = "N/A"
                np.version = version
                np.pkg_type = 'gem'
                np.arch = 'N/A'
                np.image_user_id = n.image_user_id
                np.image_id = n.image_id
                np.pkg_path = n.path

                # other
                np.pkg_path_hash = n.path_hash
                np.distro_name = 'gem'
                np.distro_version = 'N/A'
                np.like_distro = 'gem'
                np.fullversion = np.version
                np.license = ' '.join(n.licenses_json)
                np.origin = ' '.join(n.origins_json)
                fullname = np.name
                np.normalized_src_pkg = fullname
                np.src_pkg = fullname
                gems.append(np)
                if len(gems) >= chunk_size:
                    startts = time.time()
                    try:
                        with session_scope() as dbsession:
                            dbsession.bulk_save_objects(gems)
                            record_count = record_count + chunk_size
                    except:
                        log.err("skipping duplicates")
                        record_count = record_count + chunk_size
                    log.err("merged {} / {} gem records (time={}), performing next range".format(record_count, total_gems, time.time() - startts))

                    gems = []

            if len(gems):
                startts = time.time()
                try:
                    with session_scope() as dbsession:
                        dbsession.bulk_save_objects(gems)
                        record_count = record_count + len(gems)
                except:
                    log.err("skipping duplicates")
                    record_count = record_count + len(gems)
                log.err("final merged {} / {} gem records (time={})".format(record_count, total_gems, time.time() - startts))

        except Exception as err:
            raise err
Ejemplo n.º 10
0
def db_upgrade_007_008():
    from anchore_engine.db import session_scope, ImagePackage, ImageNpm, ImageGem
    if True:
        engine = anchore_engine.db.entities.common.get_engine()

        file_path_length = 512
        hash_length = 80

        new_columns = [{
            'table_name':
            'image_packages',
            'columns': [
                Column('pkg_path', String(file_path_length), primary_key=True),
                Column('pkg_path_hash', String(hash_length)),
                Column('metadata_json', StringJSON),
            ]
        }, {
            'table_name':
            'image_package_vulnerabilities',
            'columns': [
                Column('pkg_path', String(file_path_length), primary_key=True),
            ]
        }, {
            'table_name':
            'image_package_db_entries',
            'columns': [
                Column('pkg_path', String(file_path_length), primary_key=True),
            ]
        }]

        log.err("creating new table columns")
        for table in new_columns:
            for column in table['columns']:
                log.err("creating new column ({}) in table ({})".format(
                    column.name, table.get('table_name', "")))
                try:
                    cn = column.compile(dialect=engine.dialect)
                    ct = column.type.compile(engine.dialect)
                    engine.execute(
                        'ALTER TABLE %s ADD COLUMN IF NOT EXISTS %s %s' %
                        (table['table_name'], cn, ct))
                except Exception as e:
                    log.err(
                        'failed to perform DB upgrade on {} adding column - exception: {}'
                        .format(table, str(e)))
                    raise Exception(
                        'failed to perform DB upgrade on {} adding column - exception: {}'
                        .format(table, str(e)))

        # populate the new columns
        for table in [
                'image_packages', 'image_package_vulnerabilities',
                'image_package_db_entries'
        ]:
            log.err("updating table ({}) column (pkg_path)".format(table))
            engine.execute(
                "UPDATE {} set pkg_path='pkgdb' where pkg_path is null".format(
                    table))

        exec_commands = [
            'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkg_image_id_fkey',
            'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_image_id_fkey',
            'ALTER TABLE image_packages DROP CONSTRAINT IF EXISTS image_packages_pkey',
            'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_pkey',
            'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkey',
            'ALTER TABLE image_packages ADD PRIMARY KEY (image_id,image_user_id,name,version,pkg_type,arch,pkg_path)',
            'ALTER TABLE image_package_vulnerabilities ADD PRIMARY KEY (pkg_user_id,pkg_image_id,pkg_name,pkg_version,pkg_type,pkg_arch,vulnerability_id,pkg_path)',
            'ALTER TABLE image_package_db_entries ADD PRIMARY KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path,file_path)',
            'ALTER TABLE image_package_vulnerabilities ADD CONSTRAINT image_package_vulnerabilities_pkg_image_id_fkey FOREIGN KEY (pkg_image_id, pkg_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE',
            'ALTER TABLE image_package_db_entries ADD CONSTRAINT image_package_db_entries_image_id_fkey FOREIGN KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE',
        ]

        log.err(
            "updating primary key / foreign key relationships for new column")
        cmdcount = 1
        for command in exec_commands:
            log.err("running update operation {} of {}: {}".format(
                cmdcount, len(exec_commands), command))
            engine.execute(command)
            cmdcount = cmdcount + 1

        log.err(
            "converting ImageNpm and ImageGem records into ImagePackage records"
        )
        # migrate ImageNpm and ImageGem records into ImagePackage records
        with session_scope() as dbsession:
            db_npms = dbsession.query(ImageNpm)
            db_gems = dbsession.query(ImageGem)

        gems = []
        npms = []
        try:
            for n in db_npms:
                np = ImagePackage()

                # primary keys
                np.name = n.name
                np.version = n.versions_json[0]
                np.pkg_type = 'npm'
                np.arch = 'N/A'
                np.image_user_id = n.image_user_id
                np.image_id = n.image_id
                np.pkg_path = n.path

                # other
                np.pkg_path_hash = n.path_hash
                np.distro_name = 'npm'
                np.distro_version = 'N/A'
                np.like_distro = 'npm'
                np.fullversion = np.version
                np.license = ' '.join(n.licenses_json)
                np.origin = ' '.join(n.origins_json)
                fullname = np.name
                np.normalized_src_pkg = fullname
                np.src_pkg = fullname
                npms.append(np)
        except Exception as err:
            raise err

        try:
            for n in db_gems:
                np = ImagePackage()

                # primary keys
                np.name = n.name
                np.version = n.versions_json[0]
                np.pkg_type = 'gem'
                np.arch = 'N/A'
                np.image_user_id = n.image_user_id
                np.image_id = n.image_id
                np.pkg_path = n.path

                # other
                np.pkg_path_hash = n.path_hash
                np.distro_name = 'gem'
                np.distro_version = 'N/A'
                np.like_distro = 'gem'
                np.fullversion = np.version
                np.license = ' '.join(n.licenses_json)
                np.origin = ' '.join(n.origins_json)
                fullname = np.name
                np.normalized_src_pkg = fullname
                np.src_pkg = fullname
                gems.append(np)
        except Exception as err:
            raise err

        with session_scope() as dbsession:
            log.err("merging npms: {} records to merge".format(len(npms)))
            try:
                for npm in npms:
                    dbsession.merge(npm)
            except Exception as err:
                raise err

        with session_scope() as dbsession:
            log.err("merging gems: {} records to merge".format(len(gems)))
            try:
                for gem in gems:
                    dbsession.merge(gem)
            except Exception as err:
                raise err