def filter_vulnerable_packages(cls, vulnerabilities: list, package_version_map: dict) -> dict: """Filter vulnerable package based on timestamp in pseudo version.""" logger.debug('Executing filter_vulnerable_packages') filter_vulnerabilities = {} gh = GithubUtils() for vuln in vulnerabilities: package_name = vuln['package_name'][0] for package_version, _ in package_version_map[package_name].items( ): if gh._is_commit_date_in_vuln_range( gh.extract_timestamp(package_version), vuln['vuln_commit_date_rules'][0]): if package_name not in filter_vulnerabilities: filter_vulnerabilities[package_name] = {} if package_version not in filter_vulnerabilities[ package_name]: filter_vulnerabilities[package_name][ package_version] = { 'cve': [] } filter_vulnerabilities[package_name][package_version][ 'cve'].append(vuln) return filter_vulnerabilities
def test_get_verion_list(): """Test _get_verion_list.""" gh = GithubUtils() ver = gh._get_verion_list("", "") assert ver is None ver = gh._get_verion_list("qor", "admin") assert "1.0" in ver
def ca_validate_input(input_json: Dict, ecosystem: str) -> Tuple[List[Dict], List[Package]]: """Validate CA Input.""" logger.debug('Validating ca input data.') if not input_json: error_msg = "Expected JSON request" raise BadRequest(error_msg) if not isinstance(input_json, dict): error_msg = "Expected list of dependencies in JSON request" raise BadRequest(error_msg) if not check_for_accepted_ecosystem(ecosystem): error_msg: str = f"Ecosystem {ecosystem} is not supported for this request" raise BadRequest(error_msg) if not input_json.get('package_versions'): error_msg: str = "package_versions is missing" raise BadRequest(error_msg) gh = GithubUtils() packages_list = [] normalised_input_pkgs = [] for pkg in input_json.get('package_versions'): pseudo_version = False package = given_package = pkg.get("package") clean_version = given_version = pkg.get("version") if not all([package, given_version]): error_msg = "Invalid Input: Package, Version are required." raise BadRequest(error_msg) if (not isinstance(given_version, str)) or (not isinstance(package, str)): error_msg = "Package version should be string format only." raise BadRequest(error_msg) if not validate_version(given_version): error_msg = "Package version should not have special characters." raise BadRequest(error_msg) if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) if ecosystem == 'pypi': package = package.lower() if ecosystem == 'golang': _, clean_version = GolangDependencyTreeGenerator.clean_version(given_version) pseudo_version = gh.is_pseudo_version(clean_version) # Strip module appended to the package name package = package.split('@')[0] packages_list.append( {"name": package, "given_name": given_package, "version": clean_version, "given_version": given_version, "is_pseudo_version": pseudo_version}) normalised_input_pkgs.append(normlize_packages(package, given_package, clean_version, given_version, pseudo_version)) return packages_list, normalised_input_pkgs
def __init__(self, pkg): """Init method for GolangUtils class.""" self.version_list = [] self.mode = None self.latest_version = "-1" self.gh_link = None self.license = None self.module = [] self.gh = GithubUtils() self.__populate_data(pkg)
def test_is_commit_in_date_range(): """Test _is_commit_in_date_range.""" gh = GithubUtils() res = gh._is_commit_in_vuln_range("", "", "", "") assert res is None res = gh._is_commit_in_vuln_range("kubernetes", "kubernetes", "0d4799964558", ">#2020-09-15T13:19:13Z&<=#2020-09-16T13:19:13Z," ">=#2020-09-16T13:19:13Z&<#2020-09-17T13:19:13Z," "=#2020-09-17T13:19:13Z") assert res is True res = gh._is_commit_in_vuln_range("kubernetes", "kubernetes", "0d4799964558", "*") assert res is True res = gh._is_commit_in_vuln_range("kubernetes", "kubernetes", "0d4799964558", "$%#2020-09-17T13:19:13Z") assert res is False res = gh._is_commit_in_vuln_range("kubernetes", "kubernetes", "0d4799964558", "$#2020-09-17T13:19:13Z,%#2020-09-17T13:19:13Z") assert res is False gh.GITHUB_API = "http://www.gibberish_my_data.com/" res = gh._is_commit_in_vuln_range("kubernetes", "kubernetes", "0d4799964558", "*") assert res is None
def test_get_commit_date(): """Test _get_commit_date.""" gh = GithubUtils() dt = gh._get_commit_date("kubernetes", "kubernetes", "v1.19.1") assert dt == "2020-09-09T11:17:20Z" dt = gh._get_commit_date("kubernetes", "kubernetes", "0d4799964558b1e96587737613d6e79e1679cb82") assert dt == "2020-09-17T13:19:13Z" dt = gh._get_commit_date("kubernetes", "kubernetes", "95b5b7d61338aa0f4c601e820e1d8f3e45696bbc") assert dt == "2020-09-09T11:17:20Z"
def test_is_commit_date_in_vuln_range(): """Test _is_commit_date_in_vuln_range.""" gh = GithubUtils() res = gh._is_commit_date_in_vuln_range("", "") assert res is None res = gh._is_commit_date_in_vuln_range("20200916101010", ">#2020-09-15T13:19:13Z&<=#2020-09-16T13:19:13Z," ">=#2020-09-16T13:19:13Z&<#2020-09-17T13:19:13Z," "=#2020-09-17T13:19:13Z") assert res is True res = gh._is_commit_date_in_vuln_range("0d4799964558", "*") assert res is None
def __init__(self, packages: List[Package], ecosystem: Ecosystem): """Create NormalizedPackages by removing all duplicates from packages.""" packages, self._modules = clean_and_get_pkgs(packages) super().__init__(packages, ecosystem) self._version_map = {} gh = GithubUtils() self.pseudo = set() for package in self.all_dependencies: if gh.is_pseudo_version(package.version): self._version_map[package.name] = package.version self.pseudo.add(package) # unfold set of Package into flat set of Package self._all_except_pseudo = self._all.difference(self.pseudo)
def _set_commit_hash_rules(self, data, gh_link): """To set the commit hash rules.""" hashes_range = data['hashesRange'] # If there is no range, it means all commits are vulnerable. if not hashes_range: data['commitRules'] = '*' return data gh = GithubUtils() # This is needed to get the org and name from the gh link. gh_splitted = gh_link.split("/") length = len(gh_splitted) org = gh_splitted[length - 2] name = gh_splitted[length - 1] regex_vr = "[<>=*]+" regex_op = "[0-9a-zA-Z\\_\\.\\-]+" rules = "" for range in hashes_range: # Remove any blank spaces. range = range.replace(" ", "") operands = re.split(regex_vr, range) operators = re.split(regex_op, range) if len(operators) == 2 and len(operands) == 2: # It means there is only 1 condition. date = gh._get_commit_date( org, name, self.__format_golang_version(operands[1])) if date: rules = rules + operators[0] + "#" + date + "," else: logger.error( "No such data present on Github. Contact Snyk.") elif len(operators) == 3 and len(operands) == 3: # It means there is a nesting. Ex >x & <y. date1 = gh._get_commit_date( org, name, self.__format_golang_version(operands[1])) date2 = gh._get_commit_date( org, name, self.__format_golang_version(operands[2])) if date1 and date2: rules = rules + operators[0] + "#" + date1 +\ "&" + operators[1] + "#" + date2 + "," else: logger.error( "No such data present on Github. Contact Snyk.") else: logger.error("Incorrect hashesRange data. Contact Snyk.") # Remove extra , which is get appended. if rules: rules = rules[:-1] data['commitRules'] = rules return data
def get_known_unknown_pkgs( ecosystem: str, graph_response: Dict, normalised_input_pkgs: List, ignore: Dict = {}) -> Tuple[List[Dict], Set[Package]]: """Analyse Known and Unknown Packages. :param ecosystem: Ecosystem :param graph_response: Graph Response :param normalised_input_pkgs: Normalised Input Packages :param ignore: Packages to be ignored while showing vulnerabilities. :return: Stack Recommendations, Unknown Pkgs """ normalised_input_pkg_map = None # Mapping is required only for Golang. if ecosystem == 'golang': normalised_input_pkg_map = { get_package_version_key(input_pkg.package, input_pkg.version): { 'given_name': input_pkg.given_name, 'version': input_pkg.version, 'given_version': input_pkg.given_version } for input_pkg in normalised_input_pkgs } stack_recommendation = [] db_known_packages = set() gh = GithubUtils() for package in graph_response.get('result', {}).get('data'): pkg_name = package.get('package').get('name', [''])[0] clean_version = get_clean_version( pkg_name, package.get('version').get('version', [''])[0], normalised_input_pkg_map) pseudo_version = gh.is_pseudo_version( clean_version) if ecosystem == 'golang' else False given_pkg_name, given_pkg_version = get_given_name_and_version( pkg_name, clean_version, normalised_input_pkg_map) pkg_recomendation = CABatchResponseBuilder(ecosystem). \ generate_recommendation(package, given_pkg_name, given_pkg_version, ignore) stack_recommendation.append(pkg_recomendation) db_known_packages.add( normlize_packages(name=pkg_name, given_name=given_pkg_name, version=clean_version, given_version=given_pkg_version, is_pseudo_version=pseudo_version)) input_dependencies = set(normalised_input_pkgs) unknown_pkgs: Set = input_dependencies.difference(db_known_packages) return stack_recommendation, unknown_pkgs
def _filter_vulnerable_packages(self, vulnerabilities: List) -> Dict: """Filter out vulnerabilities whose commit sha is out of vuln_commit_rules.""" logger.info('Executing filter_vulnerable_packages') filter_vulnerabilities = defaultdict(list) gh = GithubUtils() for vuln in vulnerabilities: package_name = vuln.get('package_name', [None])[0] vuln_rules = vuln.get('vuln_commit_date_rules', [None])[0] pseudo_version = self._normalized_packages.version_map.get(package_name) if not pseudo_version: logger.debug("Not a Pseudo Version.") continue time_stamp = gh.extract_timestamp(pseudo_version) if all([vuln_rules, time_stamp, gh._is_commit_date_in_vuln_range(time_stamp, vuln_rules)]): filter_vulnerabilities[package_name].append(vuln) return filter_vulnerabilities
def clean_and_get_pkgs(packages) -> Tuple[List[Package], List[str]]: """Clean and get golang packages.""" all_packages: List[Package] = [] all_modules: List[str] = [] gh = GithubUtils() for direct in packages: pkg_name, pkg_mod = get_golang_metadata(direct) _, package_version = GolangDependencyTreeGenerator.clean_version( direct.version) pkg = Package(name=pkg_name, version=package_version, dependencies=[]) if gh.is_pseudo_version(package_version): all_modules.append(pkg_mod) for trans_pkg in direct.dependencies or []: trans_name, trans_mod = get_golang_metadata(trans_pkg) _, trans_version = GolangDependencyTreeGenerator.clean_version( trans_pkg.version) trans = Package(name=trans_name, version=trans_version) pkg.dependencies.append(trans) if gh.is_pseudo_version(trans_version): all_modules.append(trans_mod) all_packages.append(pkg) return all_packages, all_modules
def test_extract_timestamp(): """Test extract_timestamp.""" test_data = { "1.3.4": None, "v2.3.7": None, "1.3.4-alpha": None, "v.4.3.2-alpha": None, "v2.5.4+incompatible": None, "v0.0.0-20201010233445-abcd4321dcba": "20201010233445", "0.0.0-20201010233445-abcd4321dcba": "20201010233445", "20201010233445-abcd4321dcba": "20201010233445", "v0.0.0-20201010233445abcd4321dcba": "20201010233445", "v0.0.0-20201010233445-abcd4321": "20201010233445", "v0.0.0-202010102345-abcd4321dcba": None, "v0.0.0-20201010233445-abcd4321dcba-alpha3.4": "20201010233445", "v0.0.0-20201010233445-abcd4321dcba+incompatible": "20201010233445", "v0.0.0-abcd4321dcba-20201010233445": "20201010233445", } gh = GithubUtils() for version, expected_value in test_data.items(): res = gh.extract_timestamp(version) assert res == expected_value, f"For {version} expected value: {expected_value}"
def test_is_pseudo_version(): """Test is_pseudo_version.""" test_data = { "1.3.4": False, "v2.3.7": False, "1.3.4-alpha": False, "v.4.3.2-alpha": False, "v2.5.4+incompatible": False, "v0.0.0-20201010233445-abcd4321dcba": True, "0.0.0-20201010233445-abcd4321dcba": True, "20201010233445-abcd4321dcba": False, "v0.0.0-20201010233445abcd4321dcba": False, "v0.0.0-20201010233445-abcd4321": False, "v0.0.0-202010102345-abcd4321dcba": False, "v0.0.0-20201010233445-abcd4321dcba-alpha3.4": True, "v0.0.0-20201010233445-abcd4321dcba+incompatible": True, "v0.0.0-abcd4321dcba-20201010233445": False } gh = GithubUtils() for version, expected_value in test_data.items(): res = gh.is_pseudo_version(version) assert res == expected_value, f"For {version} expected value: {expected_value}"
def test_get_date_from_semver(): """Test _get_date_from_semver.""" gh = GithubUtils() dt = gh._get_date_from_semver("wiuroruw", "gshfkjlsdjkh", "v1.19.1") assert dt is None sv = gh._get_date_from_semver("", "gshfkjlsdjkh", "v1.19.1") assert sv is None sv = gh._get_date_from_semver("fdf", "", "v1.19.1") assert sv is None sv = gh._get_date_from_semver("ff", "gshfkjlsdjkh", "") assert sv is None
def test_get_date_from_semver1(): """Test _get_date_from_semver failure.""" gh = GithubUtils() dt = gh._get_date_from_semver("kubernetes", "kubernetes", "v1.19.1") assert dt is None
def ingest_epv_into_graph(epv_details): """Handle implementation of API for triggering ingestion flow. :param epv_details: A dictionary object having list of packages/version as a nested object. Ex: { "ecosystem": "<ecosystem_name>", (*required) "packages": [ { "package": "<package_name_1>", (*required) "version": "<package_version_1>" (*required) }, { "package": "<package_name_2>", (*required) "version": "<package_version_2>" (*required) } ], "force": false, (optional) "force_graph_sync": true, (optional) "recursive_limit": 0 (optional) "source": "<Consumer_of_API>"(optional) } """ logger.info('graph_ingestion_:_ingest_epv_into_graph() is called.') input_data = epv_details.get('body', {}) # Check if worker flow activation is disabled. if not _INVOKE_API_WORKERS: logger.debug('Worker flows are disabled.') input_data['message'] = 'Worker flows are disabled.' return input_data, 201 # Check if API consumer is CA or SA and unknown package ingestion flag is disabled. if _DISABLE_UNKNOWN_PACKAGE_FLOW and input_data.get('source', '') == 'api': logger.debug('Unknown package ingestion is disabled.') input_data['message'] = 'Unknown package ingestion is disabled.' return input_data, 201 gh = GithubUtils() ecosystem = input_data.get('ecosystem') package_list = input_data.get('packages') node_arguments = { "ecosystem": ecosystem, "force": input_data.get('force', True), "recursive_limit": input_data.get('recursive_limit', 0), "force_graph_sync": input_data.get('force_graph_sync', False) } # Iterate through packages given for current ecosystem. for item in package_list: if ecosystem == 'golang': _, clean_version = GolangDependencyTreeGenerator.\ clean_version(item.get('version')) if gh.is_pseudo_version(clean_version): item['error_message'] = 'Golang pseudo version is not supported.' continue flow_name = 'newPackageFlow' if ecosystem == 'golang' else 'bayesianApiFlow' if 'flow_name' in input_data: flow_name = input_data['flow_name'] node_arguments['name'] = item.get('package') node_arguments['version'] = item.get('version') try: # Initiate Selinon flow for current EPV ingestion. dispacher_id = run_flow(flow_name, node_arguments) item['dispacher_id'] = dispacher_id.id except Exception as e: logger.error('Exception while initiating the worker flow %s', e) return {'message': 'Failed to initiate worker flow.'}, 500 logger.info('A %s in initiated for eco: %s, pkg: %s, ver: %s', flow_name, ecosystem, item['package'], item['version']) return input_data, 201
class GolangUtils: """Golang utils class.""" def __init__(self, pkg): """Init method for GolangUtils class.""" self.version_list = [] self.mode = None self.latest_version = "-1" self.gh_link = None self.license = None self.module = [] self.gh = GithubUtils() self.__populate_data(pkg) def __fetch_all_versions(self, obj): """Fetch all the versions of a pkg.""" page_exist = obj.get_sub_data( 'div', {'data-test-id': 'UnitHeader-breadcrumb'}) ver_obj = obj.get_sub_data('div', {'class': 'Versions'}) ver_list = obj.get_value_from_list('a', None, None, None, None, ver_obj) final_list = [] if len(ver_list) != 0: for ver in ver_list: if ver.startswith('v0.0.0-'): continue elif "+incompatible" in ver: intermediate_value = ver.split('+incompatible')[0] if "v" in intermediate_value: version = intermediate_value.split('v')[1] else: version = intermediate_value final_list.append(version) else: if ver.startswith('v'): version = ver.split('v')[1] else: version = ver final_list.append(version) # The tab exist logic is added because in some cases, you wont find any versions under tab. if ver_list or page_exist: link = self.get_gh_link() if link and "https://github.com/" in link: org_name = self.get_gh_link().split( "https://github.com/")[1].split("/") all_ver = self.gh._get_verion_list(org_name[0], org_name[1]) if all_ver: if final_list: all_ver.extend(final_list) return list(set(all_ver)) elif len(final_list) == 0: return ['none'] else: return [] return ver_list def __select_latest_version(self, versions=[]): """Select latest version from list.""" if len(versions) == 0: return "" version_arr = [] for x in versions: version_arr.append(ComparableVersion(x)) version_arr.sort() return str(version_arr[-1]) def __fetch_latest_version(self): """Fetch the latest version of a pkg.""" all_ver = self.get_all_versions() if all_ver and len(all_ver) != 0 and all_ver[0] != "none": return self.__select_latest_version(all_ver) else: return "" def __fetch_license(self, obj): """Fetch the github link of a pkg.""" sub_obj = obj.get_sub_data('span', {'data-test-id': 'UnitHeader-licenses'}) lic_list = obj.get_value_from_list('a', None, None, None, None, sub_obj) final_lic_list = [] for lic in lic_list or []: if ', ' in lic: lics = lic.split(', ') final_lic_list.extend(lics) elif ',' in lic: lics = lic.split(', ') final_lic_list.extend(lics) else: final_lic_list.append(lic) return final_lic_list def __fetch_gh_link(self, obj): """Fetch the github link of a pkg.""" return obj.get_value( 'a', None, 'href', obj.get_sub_data('div', {'class': 'UnitMeta-repo'})) def __fetch_module(self, obj, mod_val=None): """Fetch the module of a pkg.""" module_lst = [] if not mod_val: # mod_val = obj.get_value('a', {'data-test-id': 'DetailsHeader-infoLabelModule'}) sub_obj = obj.get_sub_data( 'div', {'data-test-id': 'UnitHeader-breadcrumb'}) mod_list = obj.get_value_from_list('a', None, None, None, None, sub_obj) if len(mod_list) == 1 and mod_list[0] == 'Discover Packages': mod_val = obj.get_value( 'span', {'data-test-id': 'UnitHeader-breadcrumbCurrent'}) self.mode = "mod" if len(mod_list) >= 2: mod_val = mod_list[1] if mod_val: module_lst.append(mod_val) if "github" not in mod_val: gh_link = self.get_gh_link() if "https" in gh_link: module_lst.append(gh_link.split('https://')[1]) return module_lst def __populate_data(self, pkg): """Set the data for the golang pkg.""" _logger.info("Populating the data object for {}".format(pkg)) pkg_url = "https://pkg.go.dev/{}".format(pkg) mod_url = "https://pkg.go.dev/mod/{}".format(pkg) scraper = Scraper(pkg_url + "?tab=versions") self.mode = "pkg" self.url = pkg_url self.version_list = self.__fetch_all_versions(scraper) if len(self.version_list) == 0: _logger.info("Fetching the details from mod.") scraper = Scraper(mod_url + "?tab=versions") self.mode = "mod" self.url = mod_url self.version_list = self.__fetch_all_versions(scraper) if len(self.version_list) != 0: self.latest_version = self.__fetch_latest_version() self.module = self.__fetch_module(scraper, pkg) else: self.mode = "Not Found" else: _logger.info("Fetching the details from pkg.") self.latest_version = self.__fetch_latest_version() self.module = self.__fetch_module(scraper) def get_module(self): """Return module name of a pkg.""" if self.module == "Not Found": return None return self.module def get_all_versions(self): """Return all the versions of a pkg.""" if self.mode == "Not Found": return None return self.version_list def get_latest_version(self): """Return the latest versions of a pkg.""" if self.mode == "Not Found": return None return self.latest_version def get_gh_link(self): """Return the gh link of a pkg.""" if self.mode == "Not Found": return None if not self.gh_link: if self.mode == "pkg": url = self.url + "?tab=overview" else: url = self.url + "?tab=Overview" scraper_ov = Scraper(url) self.gh_link = self.__fetch_gh_link(scraper_ov) self.license = self.__fetch_license(scraper_ov) return self.gh_link def get_license(self): """Return declared license of a pkg.""" if self.mode == "Not Found": return None if not self.license: if self.mode == "pkg": url = self.url + "?tab=overview" else: url = self.url + "?tab=Overview" scraper_ov = Scraper(url) self.gh_link = self.__fetch_gh_link(scraper_ov) self.license = self.__fetch_license(scraper_ov) return self.license