Beispiel #1
0
def rectify_latest_version(input):
    """Rectify the latest version of the EPVs."""
    query_str = "g.V().has('ecosystem', '{arg0}')" \
                ".has('name', '{arg1}')" \
                ".property('latest_version', '{arg2}')" \
                ".property('latest_version_last_updated', '{arg3}');"
    args = []
    resp = {
        "message": "Latest version rectified for the EPVs",
        "status": "Success"
    }
    cur_date = (datetime.utcnow()).strftime('%Y%m%d')
    for epv in input:
        if 'ecosystem' in epv and 'name' in epv:
            eco = epv['ecosystem']
            pkg = epv['name']
            tmp = {
                "0": eco,
                "1": pkg
            }
            if 'actual_latest_version' in epv:
                latest = epv['actual_latest_version']
            else:
                latest = get_latest_versions_for_ep(eco, pkg)
            tmp['2'] = latest
            tmp['3'] = cur_date
            known_latest = ''
            if 'latest_version' in epv:
                known_latest = epv['latest_version']
            if known_latest != latest:
                args.append(tmp)
    result_data = batch_query_executor(query_str, args)
    logger.info("Latest version updated for the EPVs -> {r}".format(r=result_data))
    return resp
    def construct_graph_nodes(cls, epv):
        """Create query string to create empty EPV nodes."""
        ecosystem = epv.get('ecosystem')
        pkg_name = epv.get('name')
        version = epv.get('version')
        source_repo = epv.get('source_repo', '')
        latest_version = epv.get('latest_version', '')
        if not latest_version:
            latest_version = get_latest_versions_for_ep(ecosystem, pkg_name)
        if ecosystem and pkg_name and version:
            # Query to Create Package Node
            # TODO: refactor into the separate module
            pkg_str = "pkg = g.V().has('ecosystem','{ecosystem}').has('name', '{pkg_name}')." \
                      "tryNext().orElseGet{{g.V()." \
                      "has('vertex_label','Count').choose(has('{ecosystem}_pkg_count')," \
                      "sack(assign).by('{ecosystem}_pkg_count').sack(sum).by(constant(" \
                      "1)).property('{ecosystem}_pkg_count',sack())," \
                      "property('{ecosystem}_pkg_count',1)).iterate();" \
                      "graph.addVertex('ecosystem', '{ecosystem}', " \
                      "'name', '{pkg_name}', 'vertex_label', 'Package');}};" \
                      "pkg.property('latest_version', '{latest_version}');" \
                      "pkg.property('last_updated', {last_updated});".format(
                        ecosystem=ecosystem, latest_version=latest_version, pkg_name=pkg_name,
                        last_updated=str(time.time())
                       )

            # Query to Create Version Node
            # TODO: refactor into the separate module
            ver_str = "ver = g.V().has('pecosystem', '{ecosystem}').has('pname', " \
                      "'{pkg_name}').has('version', '{version}').tryNext().orElseGet{{" \
                      "g.V().has('vertex_label','Count').choose(has('{ecosystem}_ver_count')," \
                      "sack(assign).by('{ecosystem}_ver_count').sack(sum).by(constant(" \
                      "1)).property('{ecosystem}_ver_count',sack())," \
                      "property('{ecosystem}_ver_count',1)).iterate();" \
                      "graph.addVertex('pecosystem','{ecosystem}', 'pname','{pkg_name}', " \
                      "'version', '{version}', 'vertex_label', 'Version');}};" \
                      "ver.property('last_updated',{last_updated});".format(
                        ecosystem=ecosystem, pkg_name=pkg_name, version=version,
                        last_updated=str(time.time()))
            # Add version node properties
            if source_repo:
                ver_str += "ver.property('source_repo','{source_repo}');".format(
                    source_repo=source_repo)

            # Query to create an edge between Package Node to Version Node
            # TODO: refactor into the separate module
            edge_str = "edge_c = g.V().has('pecosystem','{ecosystem}').has('pname'," \
                       "'{pkg_name}').has('version','{version}').in(" \
                       "'has_version').tryNext()" \
                       ".orElseGet{{pkg.addEdge('has_version', ver)}};".format(
                        ecosystem=ecosystem, pkg_name=pkg_name, version=version)

            return pkg_str + ver_str + edge_str
        else:
            return None
def generate_report_for_latest_version(epv_list, day):
    """Generate a report for the latest version.

    :param epv_list: list, list of EPVs
    :return json, list of version information
    """
    query_str = "g.V().has('ecosystem', '{arg0}')." \
                "has('name', '{arg1}')" \
                ".valueMap().dedup().fill(epv);"
    report_result = {}
    args = []
    for epv in epv_list:
        eco = epv['ecosystem']
        pkg = epv['name']
        args.append({"0": eco, "1": pkg})
        tmp = {
            "ecosystem": eco,
            "name": pkg,
            "known_latest_version": "",
            "actual_latest_version": "",
            "non_cve_version": ""
        }
        report_result[eco + "@DELIM@" + pkg] = tmp

    result_data = batch_query_executor(query_str, args)
    today = day.strftime('%Y%m%d')
    yesterday = (day - timedelta(days=1)).strftime('%Y%m%d')
    if result_data is not None:
        for res in result_data:
            eco = get_value(res, 'ecosystem')
            pkg = get_value(res, 'name')
            latest_pkg_version = get_value(res, 'latest_version')
            non_cve_version = get_value(res, 'latest_non_cve_version')
            last_updated_date = get_value(res, 'latest_version_last_updated')
            if last_updated_date == today or last_updated_date == yesterday:
                report_result[
                    eco + "@DELIM@" +
                    pkg]['actual_latest_version'] = latest_pkg_version
            else:
                _logger.info(
                    "Dates don't match. Will pick the version from upstream for {e} {p}"
                    .format(e=eco, p=pkg))
                latest = get_latest_versions_for_ep(eco, pkg)
                report_result[eco + "@DELIM@" +
                              pkg]['actual_latest_version'] = latest
            report_result[eco + "@DELIM@" +
                          pkg]['known_latest_version'] = latest_pkg_version
            report_result[eco + "@DELIM@" +
                          pkg]['non_cve_version'] = non_cve_version

    return report_result
    def create_query_string(cls, input_json):
        """Create query to get information about the package or package+version ."""
        # TODO add check of JSON against the schema

        # NPM packages with dependencies, versions i.e. Package version
        # TODO add check for existence of this attribute
        pkg_name = input_json.get('package')
        # TODO add check for existence of this attribute
        ecosystem = input_json.get('ecosystem')
        version = cls.sanitize_text_for_query(input_json.get('version'))
        # creation of query string
        str_gremlin = ""
        str_package, prp_package = cls.construct_package_query(input_json)
        if prp_package:
            str_gremlin = str_package + prp_package

        if version is not None and version != '':
            str_gremlin_version = cls.construct_version_query(input_json)
            # Add edge from Package to Version
            if str_gremlin_version:
                str_gremlin += str_gremlin_version
                if not prp_package:
                    # TODO: refactor into the separate module
                    latest_version = get_latest_versions_for_ep(
                        ecosystem, pkg_name)
                    str_gremlin += "pkg = g.V().has('ecosystem','{ecosystem}')." \
                                   "has('name', '{pkg_name}').tryNext().orElseGet{{" \
                                   "g.V().has('vertex_label','Count').choose(has('" \
                                   "{ecosystem}_pkg_count'),sack(assign).by('" \
                                   "{ecosystem}_pkg_count').sack(sum).by(constant(1))." \
                                   "property('{ecosystem}_pkg_count',sack()),property(" \
                                   "'{ecosystem}_pkg_count',1)).iterate();graph.addVertex(" \
                                   "'ecosystem', '{ecosystem}', 'name', '{pkg_name}', " \
                                   "'vertex_label', 'Package');}};" \
                                   "pkg.property('latest_version', '{latest_version}');" \
                                   "pkg.property('last_updated', {last_updated});".format(
                                    ecosystem=ecosystem, latest_version=latest_version,
                                    pkg_name=pkg_name, last_updated=str(time.time()))
                # TODO: refactor into the separate module
                str_gremlin += "edge_c = g.V().has('pecosystem','{ecosystem}').has('pname'," \
                               "'{pkg_name}').has('version','{version}').in(" \
                               "'has_version').tryNext()" \
                               ".orElseGet{{pkg.addEdge('has_version', ver)}};".format(
                                ecosystem=ecosystem, pkg_name=pkg_name, version=version)

        logger.info("Gremlin Query: %s" % str_gremlin)
        return str_gremlin
def test_get_latest_versions_for_ep():
    """Test basic behavior of function get_latest_versions_for_ep."""
    package_versions = get_latest_versions_for_ep("maven", "tomcat:catalina")
    assert package_versions is not None

    package_versions = get_latest_versions_for_ep("maven", "org.abcl:abcl")
    assert package_versions is not None

    package_versions = get_latest_versions_for_ep("pypi", "numpy")
    assert package_versions is not None

    package_versions = get_latest_versions_for_ep("npm", "array")
    assert package_versions is not None

    with pytest.raises(ValueError):
        get_latest_versions_for_ep("cobol", "cds-parsers")

    with pytest.raises(ValueError):
        get_latest_versions_for_ep("maven", None)
def generate_report_for_latest_version(epv_list):
    """Generate a report for the latest version.

    :param epv_list: list, list of EPVs
    :return json, list of version information
    """
    query_str = "g.V().has('ecosystem', '{arg0}')." \
                "has('name', '{arg1}')" \
                ".valueMap().dedup().fill(epv);"
    report_result = {}
    args = []
    for epv in epv_list:
        eco = epv['ecosystem']
        pkg = epv['name']
        args.append({
            "0": eco,
            "1": pkg
        })
        latest = get_latest_versions_for_ep(eco, pkg)
        tmp = {
            "ecosystem": eco,
            "name": pkg,
            "known_latest_version": "",
            "actual_latest_version": latest
        }
        report_result[eco + "@" + pkg] = tmp

    result_data = batch_query_executor(query_str, args)
    if result_data is not None:
        for res in result_data:
            eco = get_value(res, 'ecosystem')
            pkg = get_value(res, 'name')
            latest_pkg_version = get_value(res, 'latest_version')
            report_result[eco + "@" + pkg]['known_latest_version'] = latest_pkg_version

    return report_result
    def construct_graph_nodes(cls, epv):
        """Create query string to create empty EPV nodes."""
        ecosystem = epv.get('ecosystem')
        pkg_name = epv.get('name')
        version = epv.get('version')
        source_repo = epv.get('source_repo', '')
        license = epv.get('license', [])
        gh_link = epv.get('gh_link', '')
        latest_version = epv.get('latest_version', '')
        if not latest_version:
            latest_version = get_latest_versions_for_ep(ecosystem, pkg_name)
        bindings = {
            "ecosystem": ecosystem,
            "name": pkg_name,
            "version": version,
            "repo": source_repo,
            "gh_link": gh_link,
            "latest": latest_version,
            "ep_count": ecosystem + "_pkg_count",
            "epv_count": ecosystem + "_ver_count",
            "last_updated": str(time.time()),
            "vertex_p": "Package",
            "vertex_c": "Count",
            "vertex_v": "Version"
        }
        if ecosystem and pkg_name and version:
            # Query to Create Package Node
            # TODO: refactor into the separate module
            pkg_str = "pkg = g.V().has('ecosystem',ecosystem).has('name', name)." \
                      "tryNext().orElseGet{g.V()." \
                      "has('vertex_label',vertex_c).choose(has(ep_count)," \
                      "sack(assign).by(ep_count).sack(sum).by(constant(" \
                      "1)).property(ep_count,sack())," \
                      "property(ep_count,1)).iterate();" \
                      "graph.addVertex('ecosystem', ecosystem, " \
                      "'name', name, 'vertex_label', vertex_p);};" \
                      "pkg.property('latest_version', latest);" \
                      "pkg.property('last_updated', last_updated);"

            # Query to Create Version Node
            # TODO: refactor into the separate module
            ver_str = "ver = g.V().has('pecosystem', ecosystem).has('pname', " \
                      "name).has('version', version).tryNext().orElseGet{" \
                      "g.V().has('vertex_label', vertex_c).choose(has(epv_count)," \
                      "sack(assign).by(epv_count).sack(sum).by(constant(" \
                      "1)).property(epv_count,sack())," \
                      "property(epv_count,1)).iterate();" \
                      "graph.addVertex('pecosystem',ecosystem, 'pname',name, " \
                      "'version', version, 'vertex_label', vertex_v);};" \
                      "ver.property('last_updated',last_updated);"
            # Add version node properties
            if source_repo:
                ver_str += "ver.property('source_repo', repo);"

            if license and len(license) > 0:
                counter = 1
                for lic in license:
                    ver_str += "ver.property('declared_licenses', lic" + str(
                        counter) + ");"
                    bindings["lic" + str(counter)] = lic
                    counter += 1

            # Add package node properties
            if gh_link:
                pkg_str += "pkg.property('gh_link', gh_link);"

            # Query to create an edge between Package Node to Version Node
            # TODO: refactor into the separate module
            edge_str = "edge_c = g.V().has('pecosystem', ecosystem).has('pname'," \
                       "name).has('version', version).in(" \
                       "'has_version').tryNext()" \
                       ".orElseGet{pkg.addEdge('has_version', ver)};"

            return pkg_str + ver_str + edge_str, bindings
        else:
            return None, None
Beispiel #8
0
def _import_keys_from_s3_http(data_source, epv_list):
    # TODO: reduce cyclomatic complexity
    logger.debug("Begin import...")
    report = {'status': 'Success', 'message': 'The import finished successfully!'}
    count_imported_EPVs = 0
    last_imported_EPV = None
    epv = []
    for epv_key in epv_list:
        for key, contents in epv_key.items():
            if len(contents.get('pkg_list_keys')) == 0 and len(contents.get('ver_list_keys')) == 0:
                report['message'] = 'Nothing to be imported! No data found on S3 to be imported!'
                continue
            pkg_ecosystem = contents.get('ecosystem')
            pkg_name = contents.get('package')
            pkg_version = contents.get('version') or ''
            pkg_source = contents.get('source_repo', pkg_ecosystem)

            obj = {
                'ecosystem': pkg_ecosystem,
                'package': pkg_name,
                'version': pkg_version,
                'source_repo': pkg_source}

            latest_version = get_latest_versions_for_ep(pkg_ecosystem, pkg_name)
            latest_epv_list = [{
                'ecosystem': pkg_ecosystem,
                'name': pkg_name,
                'version': latest_version
            }]
            create_graph_nodes(latest_epv_list)

            try:
                # Check other Version level information and add it to common object
                if len(contents.get('ver_list_keys')) > 0:
                    first_key = contents['ver_key_prefix'] + '.json'
                    first_obj = _first_key_info(data_source, first_key, config.AWS_EPV_BUCKET)
                    first_obj['latest_version'] = latest_version
                    obj.update(first_obj)
                    ver_obj = _other_key_info(data_source, contents.get('ver_list_keys'),
                                              config.AWS_EPV_BUCKET)
                    if 'analyses' in obj:
                        obj.get('analyses', {}).update(ver_obj['analyses'])
                    else:
                        obj.update(ver_obj)

                # Check Package related information and add it to package object
                if len(contents.get('pkg_list_keys')) > 0:
                    pkg_obj = _other_key_info(data_source, contents.get('pkg_list_keys'),
                                              config.AWS_PKG_BUCKET)
                    if 'analyses' in obj:
                        obj.get('analyses', {}).update(pkg_obj['analyses'])
                    else:
                        obj.update(pkg_obj)

                # Create Gremlin Query
                str_gremlin = GraphPopulator.create_query_string(obj)

                if str_gremlin:
                    # Fire Gremlin HTTP query now
                    epv_full = pkg_ecosystem + ":" + pkg_name + ":" + pkg_version
                    logger.info("Ingestion initialized for EPV - %s" % epv_full)
                    epv.append(epv_full)
                    payload = {'gremlin': str_gremlin}
                    response = requests.post(config.GREMLIN_SERVER_URL_REST,
                                             data=json.dumps(payload), timeout=30)
                    resp = response.json()

                    if resp['status']['code'] == 200:
                        count_imported_EPVs += 1
                        last_imported_EPV = (obj.get('ecosystem') + ":" + obj.get('package') +
                                             ":" + obj.get('version'))

                        # update first key with graph synced tag
                        logger.info("Mark as synced in RDS %s" % last_imported_EPV)
                        if not config.AWS_S3_IS_LOCAL:  # pragma: no cover
                            PostgresHandler().mark_epv_synced(
                                obj.get('ecosystem'),
                                obj.get('package'),
                                obj.get('version')
                            )

            except Exception as e:  # pragma: no cover
                logger.error(e)
                msg = _get_exception_msg("The import failed", e)
                report['status'] = 'Failure'
                report['message'] = msg
                report['epv'] = epv_key

    report['epv'] = epv_list
    report['count_imported_EPVs'] = count_imported_EPVs
    if count_imported_EPVs == 0 and report['status'] == 'Success':
        report['message'] = 'Nothing to be synced to Graph!'
    report['last_imported_EPV'] = last_imported_EPV

    return report
def test_get_latest_versions_for_ep():
    """Test basic behavior of function get_latest_versions_for_ep."""
    package_versions = get_latest_versions_for_ep("maven", "tomcat:catalina")
    assert package_versions is not None

    package_versions = get_latest_versions_for_ep("maven", "org.abcl:abcl")
    assert package_versions is not None

    package_versions = get_latest_versions_for_ep("pypi", "numpy")
    assert package_versions is not None

    package_versions = get_latest_versions_for_ep("npm", "array")
    assert package_versions is not None

    package_versions = get_latest_versions_for_ep("npm", "lerna-tt-pk2-sy")
    assert package_versions is not None

    package_versions = get_latest_versions_for_ep(
        "golang", "github.com/grafana/grafana")
    assert package_versions is not None

    package_versions = get_latest_versions_for_ep("golang",
                                                  "no_such_pkg_exist")
    assert not package_versions

    package_versions = get_latest_versions_for_ep("npm", "abyzdeopkl")
    assert not package_versions

    package_versions = get_latest_versions_for_ep("maven", "abyzdeopkl")
    assert not package_versions

    package_versions = get_latest_versions_for_ep("pypi", "abyzdeopkl")
    assert not package_versions

    with pytest.raises(ValueError):
        get_latest_versions_for_ep("cobol", "cds-parsers")

    with pytest.raises(ValueError):
        get_latest_versions_for_ep("maven", None)