def test_post_gremlin_normal(_mock_post): """Test error response for gremlin.""" post_gremlin(query='gremlin_query', bindings={'val': 123}) _mock_post.assert_called_once() kwargs = _mock_post.call_args_list[0][1]['json'] assert kwargs['gremlin'] == 'gremlin_query' assert kwargs['bindings'] == {'val': 123}
def _get_data_from_graph(self, packages, query, caller=None) -> Dict: """Get package data from graph along with vulnerability.""" logger.info('Executing _get_data_from_db.') time_start = time.time() pkgs_with_vuln = {"result": {"data": []}} # get rid of leading white spaces query = inspect.cleandoc(query) bindings = { 'ecosystem': self._normalized_packages.ecosystem, 'packages': [] } # call gremlin in batches of GREMLIN_QUERY_SIZE for packages in _get_packages_in_batch(packages, GREMLIN_QUERY_SIZE): bindings['packages'] = list(packages) started_at = time.time() result = post_gremlin(query, bindings) logger.info('%s took %0.2f secs for post_gremlin() batch request', self._request.external_request_id, time.time() - started_at) if result: pkgs_with_vuln['result']['data'] += result['result']['data'] logger.info('%s took %0.2f secs for %s' 'for total_results %d', self._request.external_request_id, time.time() - time_start, caller, len(pkgs_with_vuln['result']['data'])) return pkgs_with_vuln
def _get_recommended_package_details( self, insights_response) -> List[RecommendedPackageData]: companion_packages = insights_response.get("companion_packages", []) package_to_stats_map = dict( map( lambda x: (x.get("package_name"), x), companion_packages, )) packages = list(package_to_stats_map.keys()) ecosystem = insights_response["ecosystem"] query = ( """g.V().has('ecosystem', ecosystem).has('name', within(name)).valueMap()""" ) started_at = time.time() result = post_gremlin(query=query, bindings={ "ecosystem": ecosystem, "name": packages }) logger.info( "graph req.pkgs [%d] elapsed time [%0.2f] sec", len(packages), time.time() - started_at, ) def extract_version(data: Dict) -> str: # all versions are not vulnerable if latest_non_cve_version doesn't exist. # all versions are vulnerable if latest_non_cve_version is empty. recommended_version = data.get("latest_non_cve_version", data.get("latest_version", [""])) version = recommended_version[0] if len( recommended_version) else "" return version INVALID_VERSIONS = ["", "-1"] def has_valid_version(data: Dict) -> bool: return str(extract_version(data)) not in INVALID_VERSIONS def get_recommendation_statistics(package_name: str) -> Dict[str, str]: # below dict has cooccurrence_probability, cooccurrence_count, topic_list return package_to_stats_map[package_name] def map_to_recommendation_package_data(data): name = data.get("name", [""])[0] version = extract_version(data) return RecommendedPackageData( name=name, version=version, github=get_github_details(data), licenses=data.get("declared_licenses", []), ecosystem=ecosystem, url=get_snyk_package_link(ecosystem, name), latest_version=data.get("latest_version", [""])[0], # join stats from insight **get_recommendation_statistics(name), ) valid_packages = filter(has_valid_version, result["result"]["data"]) return list(map(map_to_recommendation_package_data, valid_packages))
def _get_package_details_with_vulnerabilities( self) -> List[Dict[str, object]]: """Get package data from graph along with vulnerability.""" time_start = time.time() pkgs_with_vuln = {"result": {"data": []}} query = """ epv = []; packages.each { g.V().has('pecosystem', ecosystem). has('pname', it.name). has('version', it.version).as('version', 'vuln'). select('version').in('has_version').dedup().as('package'). select('package', 'version', 'vuln'). by(valueMap()). by(valueMap()). by(out('has_snyk_cve').valueMap().fold()). fill(epv); } epv; """ # get rid of leading white spaces query = inspect.cleandoc(query) bindings = { 'ecosystem': self._normalized_packages.ecosystem, 'packages': [] } # call gremlin in batches of GREMLIN_QUERY_SIZE for pkgs in _get_packages_in_batch( self._normalized_packages.all_dependencies, GREMLIN_QUERY_SIZE): # convert Tuple[Package] into List[{name:.., version:..}] bindings['packages'] = [ pkg.dict(exclude={'dependencies'}) for pkg in pkgs ] started_at = time.time() result = post_gremlin(query, bindings) logger.info('%s took %0.2f secs for post_gremlin() batch request', self._request.external_request_id, time.time() - started_at) if result: pkgs_with_vuln['result']['data'] += result['result']['data'] logger.info( '%s took %0.2f secs for get_package_details_with_' 'vulnerabilities() for total_results %d', self._request.external_request_id, time.time() - time_start, len(pkgs_with_vuln['result']['data'])) return pkgs_with_vuln['result']['data']
def get_version_information(input_list, ecosystem): """Fetch the version information for each of the packages. Also remove EPVs with CVEs and ones not present in Graph """ str_query = "data=[]; " for package in input_list: str_query += "pkg = g.V().has('ecosystem', '{eco}').has('name', '{pkg}'); " \ "lnv = []; pkg.clone().values('latest_non_cve_version', " \ "'latest_version').fill(lnv); pkg.clone().as('package').V()." \ "has('pecosystem', '{eco}').has('pname', '{pkg}')." \ "has('version', within(lnv)).as('version')." \ "select('package', 'version').by(valueMap()).fill(data);".format( eco=ecosystem, pkg=package) str_query += "data" # Query Gremlin with packages list to get their version information gremlin_response = post_gremlin(str_query) if gremlin_response is None: return [] response = get_response_data(gremlin_response, [{0: 0}]) return response
def test_post_gremlin_exception(_mock_post): """Test error response for gremlin.""" with raises(GremlinExeception): post_gremlin(query='gremlin_quey', bindings={'val': 123})