def filter_vulnerable_packages(cls, vulnerabilities: list,
                                   package_version_map: dict) -> dict:
        """Filter vulnerable package based on timestamp in pseudo version."""
        logger.debug('Executing filter_vulnerable_packages')

        filter_vulnerabilities = {}
        gh = GithubUtils()
        for vuln in vulnerabilities:
            package_name = vuln['package_name'][0]
            for package_version, _ in package_version_map[package_name].items(
            ):
                if gh._is_commit_date_in_vuln_range(
                        gh.extract_timestamp(package_version),
                        vuln['vuln_commit_date_rules'][0]):
                    if package_name not in filter_vulnerabilities:
                        filter_vulnerabilities[package_name] = {}
                    if package_version not in filter_vulnerabilities[
                            package_name]:
                        filter_vulnerabilities[package_name][
                            package_version] = {
                                'cve': []
                            }
                    filter_vulnerabilities[package_name][package_version][
                        'cve'].append(vuln)

        return filter_vulnerabilities
def test_get_verion_list():
    """Test _get_verion_list."""
    gh = GithubUtils()
    ver = gh._get_verion_list("", "")
    assert ver is None

    ver = gh._get_verion_list("qor", "admin")
    assert "1.0" in ver
def ca_validate_input(input_json: Dict, ecosystem: str) -> Tuple[List[Dict], List[Package]]:
    """Validate CA Input."""
    logger.debug('Validating ca input data.')
    if not input_json:
        error_msg = "Expected JSON request"
        raise BadRequest(error_msg)

    if not isinstance(input_json, dict):
        error_msg = "Expected list of dependencies in JSON request"
        raise BadRequest(error_msg)

    if not check_for_accepted_ecosystem(ecosystem):
        error_msg: str = f"Ecosystem {ecosystem} is not supported for this request"
        raise BadRequest(error_msg)

    if not input_json.get('package_versions'):
        error_msg: str = "package_versions is missing"
        raise BadRequest(error_msg)

    gh = GithubUtils()
    packages_list = []
    normalised_input_pkgs = []
    for pkg in input_json.get('package_versions'):
        pseudo_version = False
        package = given_package = pkg.get("package")
        clean_version = given_version = pkg.get("version")
        if not all([package, given_version]):
            error_msg = "Invalid Input: Package, Version are required."
            raise BadRequest(error_msg)

        if (not isinstance(given_version, str)) or (not isinstance(package, str)):
            error_msg = "Package version should be string format only."
            raise BadRequest(error_msg)

        if not validate_version(given_version):
            error_msg = "Package version should not have special characters."
            raise BadRequest(error_msg)

        if ecosystem == 'maven':
            package = MavenCoordinates.normalize_str(package)

        if ecosystem == 'pypi':
            package = package.lower()

        if ecosystem == 'golang':
            _, clean_version = GolangDependencyTreeGenerator.clean_version(given_version)
            pseudo_version = gh.is_pseudo_version(clean_version)
            # Strip module appended to the package name
            package = package.split('@')[0]

        packages_list.append(
            {"name": package, "given_name": given_package,
             "version": clean_version, "given_version": given_version,
             "is_pseudo_version": pseudo_version})
        normalised_input_pkgs.append(normlize_packages(package, given_package, clean_version,
                                                       given_version, pseudo_version))
    return packages_list, normalised_input_pkgs
Example #4
0
 def __init__(self, pkg):
     """Init method for GolangUtils class."""
     self.version_list = []
     self.mode = None
     self.latest_version = "-1"
     self.gh_link = None
     self.license = None
     self.module = []
     self.gh = GithubUtils()
     self.__populate_data(pkg)
def test_is_commit_in_date_range():
    """Test _is_commit_in_date_range."""
    gh = GithubUtils()
    res = gh._is_commit_in_vuln_range("", "", "", "")
    assert res is None

    res = gh._is_commit_in_vuln_range("kubernetes", "kubernetes",
                                      "0d4799964558",
                                      ">#2020-09-15T13:19:13Z&<=#2020-09-16T13:19:13Z,"
                                      ">=#2020-09-16T13:19:13Z&<#2020-09-17T13:19:13Z,"
                                      "=#2020-09-17T13:19:13Z")
    assert res is True
    res = gh._is_commit_in_vuln_range("kubernetes", "kubernetes",
                                      "0d4799964558", "*")
    assert res is True

    res = gh._is_commit_in_vuln_range("kubernetes", "kubernetes",
                                      "0d4799964558", "$%#2020-09-17T13:19:13Z")
    assert res is False

    res = gh._is_commit_in_vuln_range("kubernetes", "kubernetes",
                                      "0d4799964558",
                                      "$#2020-09-17T13:19:13Z,%#2020-09-17T13:19:13Z")
    assert res is False

    gh.GITHUB_API = "http://www.gibberish_my_data.com/"
    res = gh._is_commit_in_vuln_range("kubernetes", "kubernetes",
                                      "0d4799964558", "*")
    assert res is None
def test_get_commit_date():
    """Test _get_commit_date."""
    gh = GithubUtils()
    dt = gh._get_commit_date("kubernetes", "kubernetes", "v1.19.1")
    assert dt == "2020-09-09T11:17:20Z"

    dt = gh._get_commit_date("kubernetes", "kubernetes", "0d4799964558b1e96587737613d6e79e1679cb82")
    assert dt == "2020-09-17T13:19:13Z"

    dt = gh._get_commit_date("kubernetes", "kubernetes", "95b5b7d61338aa0f4c601e820e1d8f3e45696bbc")
    assert dt == "2020-09-09T11:17:20Z"
def test_is_commit_date_in_vuln_range():
    """Test _is_commit_date_in_vuln_range."""
    gh = GithubUtils()
    res = gh._is_commit_date_in_vuln_range("", "")
    assert res is None

    res = gh._is_commit_date_in_vuln_range("20200916101010",
                                           ">#2020-09-15T13:19:13Z&<=#2020-09-16T13:19:13Z,"
                                           ">=#2020-09-16T13:19:13Z&<#2020-09-17T13:19:13Z,"
                                           "=#2020-09-17T13:19:13Z")
    assert res is True
    res = gh._is_commit_date_in_vuln_range("0d4799964558", "*")
    assert res is None
 def __init__(self, packages: List[Package], ecosystem: Ecosystem):
     """Create NormalizedPackages by removing all duplicates from packages."""
     packages, self._modules = clean_and_get_pkgs(packages)
     super().__init__(packages, ecosystem)
     self._version_map = {}
     gh = GithubUtils()
     self.pseudo = set()
     for package in self.all_dependencies:
         if gh.is_pseudo_version(package.version):
             self._version_map[package.name] = package.version
             self.pseudo.add(package)
     # unfold set of Package into flat set of Package
     self._all_except_pseudo = self._all.difference(self.pseudo)
 def _set_commit_hash_rules(self, data, gh_link):
     """To set the commit hash rules."""
     hashes_range = data['hashesRange']
     # If there is no range, it means all commits are vulnerable.
     if not hashes_range:
         data['commitRules'] = '*'
         return data
     gh = GithubUtils()
     # This is needed to get the org and name from the gh link.
     gh_splitted = gh_link.split("/")
     length = len(gh_splitted)
     org = gh_splitted[length - 2]
     name = gh_splitted[length - 1]
     regex_vr = "[<>=*]+"
     regex_op = "[0-9a-zA-Z\\_\\.\\-]+"
     rules = ""
     for range in hashes_range:
         # Remove any blank spaces.
         range = range.replace(" ", "")
         operands = re.split(regex_vr, range)
         operators = re.split(regex_op, range)
         if len(operators) == 2 and len(operands) == 2:
             # It means there is only 1 condition.
             date = gh._get_commit_date(
                 org, name, self.__format_golang_version(operands[1]))
             if date:
                 rules = rules + operators[0] + "#" + date + ","
             else:
                 logger.error(
                     "No such data present on Github. Contact Snyk.")
         elif len(operators) == 3 and len(operands) == 3:
             # It means there is a nesting. Ex >x & <y.
             date1 = gh._get_commit_date(
                 org, name, self.__format_golang_version(operands[1]))
             date2 = gh._get_commit_date(
                 org, name, self.__format_golang_version(operands[2]))
             if date1 and date2:
                 rules = rules + operators[0] + "#" + date1 +\
                         "&" + operators[1] + "#" + date2 + ","
             else:
                 logger.error(
                     "No such data present on Github. Contact Snyk.")
         else:
             logger.error("Incorrect hashesRange data. Contact Snyk.")
     # Remove extra , which is get appended.
     if rules:
         rules = rules[:-1]
     data['commitRules'] = rules
     return data
def get_known_unknown_pkgs(
        ecosystem: str,
        graph_response: Dict,
        normalised_input_pkgs: List,
        ignore: Dict = {}) -> Tuple[List[Dict], Set[Package]]:
    """Analyse Known and Unknown Packages.

    :param ecosystem: Ecosystem
    :param graph_response: Graph Response
    :param normalised_input_pkgs: Normalised Input Packages
    :param ignore: Packages to be ignored while showing vulnerabilities.
    :return: Stack Recommendations, Unknown Pkgs
    """
    normalised_input_pkg_map = None  # Mapping is required only for Golang.
    if ecosystem == 'golang':
        normalised_input_pkg_map = {
            get_package_version_key(input_pkg.package, input_pkg.version): {
                'given_name': input_pkg.given_name,
                'version': input_pkg.version,
                'given_version': input_pkg.given_version
            }
            for input_pkg in normalised_input_pkgs
        }
    stack_recommendation = []
    db_known_packages = set()
    gh = GithubUtils()
    for package in graph_response.get('result', {}).get('data'):
        pkg_name = package.get('package').get('name', [''])[0]
        clean_version = get_clean_version(
            pkg_name,
            package.get('version').get('version', [''])[0],
            normalised_input_pkg_map)
        pseudo_version = gh.is_pseudo_version(
            clean_version) if ecosystem == 'golang' else False
        given_pkg_name, given_pkg_version = get_given_name_and_version(
            pkg_name, clean_version, normalised_input_pkg_map)
        pkg_recomendation = CABatchResponseBuilder(ecosystem). \
            generate_recommendation(package, given_pkg_name, given_pkg_version, ignore)
        stack_recommendation.append(pkg_recomendation)
        db_known_packages.add(
            normlize_packages(name=pkg_name,
                              given_name=given_pkg_name,
                              version=clean_version,
                              given_version=given_pkg_version,
                              is_pseudo_version=pseudo_version))

    input_dependencies = set(normalised_input_pkgs)
    unknown_pkgs: Set = input_dependencies.difference(db_known_packages)
    return stack_recommendation, unknown_pkgs
    def _filter_vulnerable_packages(self, vulnerabilities: List) -> Dict:
        """Filter out vulnerabilities whose commit sha is out of vuln_commit_rules."""
        logger.info('Executing filter_vulnerable_packages')

        filter_vulnerabilities = defaultdict(list)
        gh = GithubUtils()
        for vuln in vulnerabilities:
            package_name = vuln.get('package_name', [None])[0]
            vuln_rules = vuln.get('vuln_commit_date_rules', [None])[0]
            pseudo_version = self._normalized_packages.version_map.get(package_name)
            if not pseudo_version:
                logger.debug("Not a Pseudo Version.")
                continue
            time_stamp = gh.extract_timestamp(pseudo_version)
            if all([vuln_rules, time_stamp,
                    gh._is_commit_date_in_vuln_range(time_stamp, vuln_rules)]):
                filter_vulnerabilities[package_name].append(vuln)
        return filter_vulnerabilities
def clean_and_get_pkgs(packages) -> Tuple[List[Package], List[str]]:
    """Clean and get golang packages."""
    all_packages: List[Package] = []
    all_modules: List[str] = []
    gh = GithubUtils()
    for direct in packages:
        pkg_name, pkg_mod = get_golang_metadata(direct)
        _, package_version = GolangDependencyTreeGenerator.clean_version(
            direct.version)
        pkg = Package(name=pkg_name, version=package_version, dependencies=[])
        if gh.is_pseudo_version(package_version):
            all_modules.append(pkg_mod)
        for trans_pkg in direct.dependencies or []:
            trans_name, trans_mod = get_golang_metadata(trans_pkg)
            _, trans_version = GolangDependencyTreeGenerator.clean_version(
                trans_pkg.version)
            trans = Package(name=trans_name, version=trans_version)
            pkg.dependencies.append(trans)
            if gh.is_pseudo_version(trans_version):
                all_modules.append(trans_mod)
        all_packages.append(pkg)
    return all_packages, all_modules
def test_extract_timestamp():
    """Test extract_timestamp."""
    test_data = {
        "1.3.4": None,
        "v2.3.7": None,
        "1.3.4-alpha": None,
        "v.4.3.2-alpha": None,
        "v2.5.4+incompatible": None,
        "v0.0.0-20201010233445-abcd4321dcba": "20201010233445",
        "0.0.0-20201010233445-abcd4321dcba": "20201010233445",
        "20201010233445-abcd4321dcba": "20201010233445",
        "v0.0.0-20201010233445abcd4321dcba": "20201010233445",
        "v0.0.0-20201010233445-abcd4321": "20201010233445",
        "v0.0.0-202010102345-abcd4321dcba": None,
        "v0.0.0-20201010233445-abcd4321dcba-alpha3.4": "20201010233445",
        "v0.0.0-20201010233445-abcd4321dcba+incompatible": "20201010233445",
        "v0.0.0-abcd4321dcba-20201010233445": "20201010233445",
    }

    gh = GithubUtils()
    for version, expected_value in test_data.items():
        res = gh.extract_timestamp(version)
        assert res == expected_value, f"For {version} expected value: {expected_value}"
def test_is_pseudo_version():
    """Test is_pseudo_version."""
    test_data = {
        "1.3.4": False,
        "v2.3.7": False,
        "1.3.4-alpha": False,
        "v.4.3.2-alpha": False,
        "v2.5.4+incompatible": False,
        "v0.0.0-20201010233445-abcd4321dcba": True,
        "0.0.0-20201010233445-abcd4321dcba": True,
        "20201010233445-abcd4321dcba": False,
        "v0.0.0-20201010233445abcd4321dcba": False,
        "v0.0.0-20201010233445-abcd4321": False,
        "v0.0.0-202010102345-abcd4321dcba": False,
        "v0.0.0-20201010233445-abcd4321dcba-alpha3.4": True,
        "v0.0.0-20201010233445-abcd4321dcba+incompatible": True,
        "v0.0.0-abcd4321dcba-20201010233445": False
    }

    gh = GithubUtils()
    for version, expected_value in test_data.items():
        res = gh.is_pseudo_version(version)
        assert res == expected_value, f"For {version} expected value: {expected_value}"
def test_get_date_from_semver():
    """Test _get_date_from_semver."""
    gh = GithubUtils()
    dt = gh._get_date_from_semver("wiuroruw", "gshfkjlsdjkh", "v1.19.1")
    assert dt is None
    sv = gh._get_date_from_semver("", "gshfkjlsdjkh", "v1.19.1")
    assert sv is None
    sv = gh._get_date_from_semver("fdf", "", "v1.19.1")
    assert sv is None
    sv = gh._get_date_from_semver("ff", "gshfkjlsdjkh", "")
    assert sv is None
def test_get_date_from_semver1():
    """Test _get_date_from_semver failure."""
    gh = GithubUtils()
    dt = gh._get_date_from_semver("kubernetes", "kubernetes", "v1.19.1")
    assert dt is None
Example #17
0
def ingest_epv_into_graph(epv_details):
    """Handle implementation of API for triggering ingestion flow.

    :param epv_details: A dictionary object having list of packages/version as a nested object.
    Ex:
    {
          "ecosystem": "<ecosystem_name>",     (*required)
          "packages": [
            {
              "package": "<package_name_1>",   (*required)
              "version": "<package_version_1>" (*required)
            }, {
              "package": "<package_name_2>",   (*required)
              "version": "<package_version_2>" (*required)
            }
          ],
          "force": false,              (optional)
          "force_graph_sync": true,    (optional)
          "recursive_limit": 0         (optional)
          "source": "<Consumer_of_API>"(optional)
        }
    """
    logger.info('graph_ingestion_:_ingest_epv_into_graph() is called.')
    input_data = epv_details.get('body', {})

    # Check if worker flow activation is disabled.
    if not _INVOKE_API_WORKERS:
        logger.debug('Worker flows are disabled.')
        input_data['message'] = 'Worker flows are disabled.'
        return input_data, 201

    # Check if API consumer is CA or SA and unknown package ingestion flag is disabled.
    if _DISABLE_UNKNOWN_PACKAGE_FLOW and input_data.get('source', '') == 'api':
        logger.debug('Unknown package ingestion is disabled.')
        input_data['message'] = 'Unknown package ingestion is disabled.'
        return input_data, 201

    gh = GithubUtils()
    ecosystem = input_data.get('ecosystem')
    package_list = input_data.get('packages')

    node_arguments = {
        "ecosystem": ecosystem,
        "force": input_data.get('force', True),
        "recursive_limit": input_data.get('recursive_limit', 0),
        "force_graph_sync": input_data.get('force_graph_sync', False)
    }

    # Iterate through packages given for current ecosystem.
    for item in package_list:
        if ecosystem == 'golang':
            _, clean_version = GolangDependencyTreeGenerator.\
                clean_version(item.get('version'))
            if gh.is_pseudo_version(clean_version):
                item['error_message'] = 'Golang pseudo version is not supported.'
                continue

        flow_name = 'newPackageFlow' if ecosystem == 'golang' else 'bayesianApiFlow'

        if 'flow_name' in input_data:
            flow_name = input_data['flow_name']

        node_arguments['name'] = item.get('package')
        node_arguments['version'] = item.get('version')

        try:
            # Initiate Selinon flow for current EPV ingestion.
            dispacher_id = run_flow(flow_name, node_arguments)
            item['dispacher_id'] = dispacher_id.id
        except Exception as e:
            logger.error('Exception while initiating the worker flow %s', e)
            return {'message': 'Failed to initiate worker flow.'}, 500

        logger.info('A %s in initiated for eco: %s, pkg: %s, ver: %s',
                    flow_name, ecosystem, item['package'], item['version'])

    return input_data, 201
Example #18
0
class GolangUtils:
    """Golang utils class."""
    def __init__(self, pkg):
        """Init method for GolangUtils class."""
        self.version_list = []
        self.mode = None
        self.latest_version = "-1"
        self.gh_link = None
        self.license = None
        self.module = []
        self.gh = GithubUtils()
        self.__populate_data(pkg)

    def __fetch_all_versions(self, obj):
        """Fetch all the versions of a pkg."""
        page_exist = obj.get_sub_data(
            'div', {'data-test-id': 'UnitHeader-breadcrumb'})
        ver_obj = obj.get_sub_data('div', {'class': 'Versions'})
        ver_list = obj.get_value_from_list('a', None, None, None, None,
                                           ver_obj)
        final_list = []
        if len(ver_list) != 0:
            for ver in ver_list:
                if ver.startswith('v0.0.0-'):
                    continue
                elif "+incompatible" in ver:
                    intermediate_value = ver.split('+incompatible')[0]
                    if "v" in intermediate_value:
                        version = intermediate_value.split('v')[1]
                    else:
                        version = intermediate_value
                    final_list.append(version)
                else:
                    if ver.startswith('v'):
                        version = ver.split('v')[1]
                    else:
                        version = ver
                    final_list.append(version)
        # The tab exist logic is added because in some cases, you wont find any versions under tab.
        if ver_list or page_exist:
            link = self.get_gh_link()
            if link and "https://github.com/" in link:
                org_name = self.get_gh_link().split(
                    "https://github.com/")[1].split("/")
                all_ver = self.gh._get_verion_list(org_name[0], org_name[1])
                if all_ver:
                    if final_list:
                        all_ver.extend(final_list)
                    return list(set(all_ver))
                elif len(final_list) == 0:
                    return ['none']
            else:
                return []
        return ver_list

    def __select_latest_version(self, versions=[]):
        """Select latest version from list."""
        if len(versions) == 0:
            return ""
        version_arr = []
        for x in versions:
            version_arr.append(ComparableVersion(x))
        version_arr.sort()
        return str(version_arr[-1])

    def __fetch_latest_version(self):
        """Fetch the latest version of a pkg."""
        all_ver = self.get_all_versions()
        if all_ver and len(all_ver) != 0 and all_ver[0] != "none":
            return self.__select_latest_version(all_ver)
        else:
            return ""

    def __fetch_license(self, obj):
        """Fetch the github link of a pkg."""
        sub_obj = obj.get_sub_data('span',
                                   {'data-test-id': 'UnitHeader-licenses'})
        lic_list = obj.get_value_from_list('a', None, None, None, None,
                                           sub_obj)
        final_lic_list = []
        for lic in lic_list or []:
            if ', ' in lic:
                lics = lic.split(', ')
                final_lic_list.extend(lics)
            elif ',' in lic:
                lics = lic.split(', ')
                final_lic_list.extend(lics)
            else:
                final_lic_list.append(lic)
        return final_lic_list

    def __fetch_gh_link(self, obj):
        """Fetch the github link of a pkg."""
        return obj.get_value(
            'a', None, 'href',
            obj.get_sub_data('div', {'class': 'UnitMeta-repo'}))

    def __fetch_module(self, obj, mod_val=None):
        """Fetch the module of a pkg."""
        module_lst = []
        if not mod_val:
            # mod_val = obj.get_value('a', {'data-test-id': 'DetailsHeader-infoLabelModule'})
            sub_obj = obj.get_sub_data(
                'div', {'data-test-id': 'UnitHeader-breadcrumb'})
            mod_list = obj.get_value_from_list('a', None, None, None, None,
                                               sub_obj)
            if len(mod_list) == 1 and mod_list[0] == 'Discover Packages':
                mod_val = obj.get_value(
                    'span', {'data-test-id': 'UnitHeader-breadcrumbCurrent'})
                self.mode = "mod"
            if len(mod_list) >= 2:
                mod_val = mod_list[1]
        if mod_val:
            module_lst.append(mod_val)
            if "github" not in mod_val:
                gh_link = self.get_gh_link()
                if "https" in gh_link:
                    module_lst.append(gh_link.split('https://')[1])
        return module_lst

    def __populate_data(self, pkg):
        """Set the data for the golang pkg."""
        _logger.info("Populating the data object for {}".format(pkg))
        pkg_url = "https://pkg.go.dev/{}".format(pkg)
        mod_url = "https://pkg.go.dev/mod/{}".format(pkg)
        scraper = Scraper(pkg_url + "?tab=versions")
        self.mode = "pkg"
        self.url = pkg_url
        self.version_list = self.__fetch_all_versions(scraper)
        if len(self.version_list) == 0:
            _logger.info("Fetching the details from mod.")
            scraper = Scraper(mod_url + "?tab=versions")
            self.mode = "mod"
            self.url = mod_url
            self.version_list = self.__fetch_all_versions(scraper)
            if len(self.version_list) != 0:
                self.latest_version = self.__fetch_latest_version()
                self.module = self.__fetch_module(scraper, pkg)
            else:
                self.mode = "Not Found"
        else:
            _logger.info("Fetching the details from pkg.")
            self.latest_version = self.__fetch_latest_version()
            self.module = self.__fetch_module(scraper)

    def get_module(self):
        """Return module name of a pkg."""
        if self.module == "Not Found":
            return None
        return self.module

    def get_all_versions(self):
        """Return all the versions of a pkg."""
        if self.mode == "Not Found":
            return None
        return self.version_list

    def get_latest_version(self):
        """Return the latest versions of a pkg."""
        if self.mode == "Not Found":
            return None
        return self.latest_version

    def get_gh_link(self):
        """Return the gh link of a pkg."""
        if self.mode == "Not Found":
            return None
        if not self.gh_link:
            if self.mode == "pkg":
                url = self.url + "?tab=overview"
            else:
                url = self.url + "?tab=Overview"
            scraper_ov = Scraper(url)
            self.gh_link = self.__fetch_gh_link(scraper_ov)
            self.license = self.__fetch_license(scraper_ov)
        return self.gh_link

    def get_license(self):
        """Return declared license of a pkg."""
        if self.mode == "Not Found":
            return None
        if not self.license:
            if self.mode == "pkg":
                url = self.url + "?tab=overview"
            else:
                url = self.url + "?tab=Overview"
            scraper_ov = Scraper(url)
            self.gh_link = self.__fetch_gh_link(scraper_ov)
            self.license = self.__fetch_license(scraper_ov)
        return self.license