def execute(self, arguments):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self._strict_assert(isinstance(arguments.get('ecosystem'), str))
        self._strict_assert(isinstance(arguments.get('name'), str))

        if arguments['ecosystem'] not in _SUPPORTED_ECOSYSTEMS:
            raise FatalTaskError('Unknown ecosystem: %r' %
                                 arguments['ecosystem'])

        # Don't ingest for private packages
        if not is_pkg_public(arguments['ecosystem'], arguments['name']):
            logger.info("Private package ingestion ignored %s %s",
                        arguments['ecosystem'], arguments['name'])
            raise NotABugFatalTaskError("Private package alert {} {}".format(
                arguments['ecosystem'], arguments['name']))

        return arguments
def test_is_pkg_public():
    """Test is_pkg_public function."""
    val = is_pkg_public("npm", "lodash")
    assert val is True

    val = is_pkg_public("maven", "io.vertx:vertx-web")
    assert val is True

    val = is_pkg_public("pypi", "scipy")
    assert val is True

    val = is_pkg_public("npm", "lodashssss")
    assert val is False

    val = is_pkg_public("maven", "io.vertx:vertx-webssss")
    assert val is False

    val = is_pkg_public("pypi", "scipyssss")
    assert val is False
Esempio n. 3
0
    def execute(self, arguments):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self._strict_assert(isinstance(arguments.get('ecosystem'), str))
        self._strict_assert(isinstance(arguments.get('name'), str))

        # get rid of version if scheduled from the core analyses
        arguments.pop('version', None)
        arguments.pop('document_id', None)

        db = self.storage.session
        try:
            ecosystem = Ecosystem.by_name(db, arguments['ecosystem'])
        except NoResultFound:
            raise FatalTaskError('Unknown ecosystem: %r' %
                                 arguments['ecosystem'])

        # Dont try ingestion for private packages
        if is_pkg_public(arguments['ecosystem'], arguments['name']):
            self.log.info("Package analysis flow for {} {}".format(
                arguments['ecosystem'], arguments['name']))
        else:
            self.log.info("Private package ignored "
                          "{} {} in init_package_flow".format(
                              arguments['ecosystem'], arguments['name']))
            raise NotABugFatalTaskError("Private package alert "
                                        "{} {} in init_package_flow".format(
                                            arguments['ecosystem'],
                                            arguments['name']))

        package = Package.get_or_create(db,
                                        ecosystem_id=ecosystem.id,
                                        name=arguments['name'])
        url = self.get_upstream_url(arguments)
        upstream = self.get_upstream_entry(package, url)
        if upstream is None:
            upstream = self.add_or_update_upstream(package, url)
        arguments['url'] = upstream.url

        if not arguments.get('force'):
            # can potentially schedule two flows of a same type at the same
            # time as there is no lock, but let's say it's OK
            if upstream.updated_at is not None \
                    and datetime.datetime.utcnow() - upstream.updated_at < self._UPDATE_INTERVAL:
                self.log.info(
                    'Skipping upstream package check as data are considered as recent - '
                    'last update %s.', upstream.updated_at)
                # keep track of start, but do not schedule nothing more
                # discard changes like updates
                db.rollback()
                return arguments

        # if this fails, it's actually OK, as there could be concurrency
        package_analysis = PackageAnalysis(
            package_id=package.id,
            started_at=datetime.datetime.utcnow(),
            finished_at=None)
        db.add(package_analysis)

        # keep track of updates
        upstream.updated_at = datetime.datetime.utcnow()

        db.commit()
        arguments['document_id'] = package_analysis.id
        return arguments
Esempio n. 4
0
def ingest_epv_into_graph(epv_details):
    """Handle implementation of API for triggering ingestion flow.

    :param epv_details: A dictionary object having list of packages/version as a nested object.
    Ex:
    {
          "ecosystem": "<ecosystem_name>",     (*required)
          "packages": [
            {
              "package": "<package_name_1>",   (*required)
              "version": "<package_version_1>" (*required)
            }, {
              "package": "<package_name_2>",   (*required)
              "version": "<package_version_2>" (*required)
            }
          ],
          "force": false,              (optional)
          "force_graph_sync": true,    (optional)
          "recursive_limit": 0         (optional)
          "source": "<Consumer_of_API>"(optional)
        }
    """
    logger.info('graph_ingestion_:_ingest_epv_into_graph() is called.')
    input_data = epv_details.get('body', {})

    # Check if worker flow activation is disabled.
    if not _INVOKE_API_WORKERS:
        logger.debug('Worker flows are disabled.')
        input_data['message'] = 'Worker flows are disabled.'
        return input_data, 201

    source = input_data.get('source', '')
    # Check if API consumer is CA or SA and unknown package ingestion flag is disabled.
    if _DISABLE_UNKNOWN_PACKAGE_FLOW and source == 'api':
        logger.debug('Unknown package ingestion is disabled.')
        input_data['message'] = 'Unknown package ingestion is disabled.'
        return input_data, 201

    gh = GithubUtils()
    ecosystem = input_data.get('ecosystem')
    package_list = input_data.get('packages')

    node_arguments = {
        "ecosystem": ecosystem,
        "force": input_data.get('force', True),
        "recursive_limit": input_data.get('recursive_limit', 0),
        "force_graph_sync": input_data.get('force_graph_sync', False)
    }

    # Iterate through packages given for current ecosystem.
    for item in package_list:
        # Dont try ingestion for private packages
        if not is_pkg_public(ecosystem, item.get('package')):
            logger.info("Private package ingestion is ignored {} {}".format(
                ecosystem, item.get('package')))
            item['error_message'] = 'Private package ingestion is ignored.'
            continue

        if ecosystem == 'golang':
            _, clean_version = GolangDependencyTreeGenerator.\
                clean_version(item.get('version'))
            if gh.is_pseudo_version(clean_version):
                item[
                    'error_message'] = 'Golang pseudo version is not supported.'
                continue

        flow_name = 'newPackageFlow' if ecosystem == 'golang' else 'bayesianApiFlow'

        if 'flow_name' in input_data:
            flow_name = input_data['flow_name']

        node_arguments['name'] = item.get('package')
        node_arguments['version'] = item.get('version')

        try:
            # Initiate Selinon flow for current EPV ingestion.
            dispacher_id = run_flow(flow_name, node_arguments)
            item['dispacher_id'] = dispacher_id.id
        except Exception as e:
            logger.error('Exception while initiating the worker flow %s', e)
            return {'message': 'Failed to initiate worker flow.'}, 500

        logger.info('Source %s initiated a %s for eco: %s, pkg: %s, ver: %s',
                    source, flow_name, ecosystem, item['package'],
                    item['version'])

    return input_data, 201
    def execute(self, arguments):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self.log.debug("Input Arguments: {}".format(arguments))
        self._strict_assert(isinstance(arguments.get('ecosystem'), str))
        self._strict_assert(isinstance(arguments.get('name'), str))
        self._strict_assert(isinstance(arguments.get('version'), str))

        db = self.storage.session
        try:
            ecosystem = Ecosystem.by_name(db, arguments['ecosystem'])
        except NoResultFound:
            raise FatalTaskError('Unknown ecosystem: %r' %
                                 arguments['ecosystem'])

        # make sure we store package name in its normalized form
        arguments['name'] = normalize_package_name(ecosystem.backend.name,
                                                   arguments['name'])

        if len(pattern_ignore.findall(arguments['version'])) > 0:
            self.log.info("Incorrect version alert {} {}".format(
                arguments['name'], arguments['version']))
            raise NotABugFatalTaskError("Incorrect version alert {} {}".format(
                arguments['name'], arguments['version']))

        # Dont try ingestion for private packages
        if is_pkg_public(arguments['ecosystem'], arguments['name']):
            self.log.info("Ingestion flow for {} {}".format(
                arguments['ecosystem'], arguments['name']))
        else:
            self.log.info("Private package ingestion ignored {} {}".format(
                arguments['ecosystem'], arguments['name']))
            raise NotABugFatalTaskError("Private package alert {} {}".format(
                arguments['ecosystem'], arguments['name']))

        p = Package.get_or_create(db,
                                  ecosystem_id=ecosystem.id,
                                  name=arguments['name'])
        v = Version.get_or_create(db,
                                  package_id=p.id,
                                  identifier=arguments['version'])

        if not arguments.get('force'):
            if db.query(Analysis).filter(
                    Analysis.version_id == v.id).count() > 0:
                arguments['analysis_already_exists'] = True
                self.log.debug(
                    "Arguments returned by initAnalysisFlow without force: {}".
                    format(arguments))
                return arguments

        cache_path = mkdtemp(dir=self.configuration.WORKER_DATA_DIR)
        epv_cache = ObjectCache.get_from_dict(arguments)
        npm_dir = self.configuration.NPM_DATA_DIR

        try:
            if not epv_cache.\
                    has_source_tarball():
                _, source_tarball_path = IndianaJones.fetch_artifact(
                    ecosystem=ecosystem,
                    artifact=arguments['name'],
                    version=arguments['version'],
                    target_dir=cache_path)
                epv_cache.put_source_tarball(source_tarball_path)

            if ecosystem.is_backed_by(EcosystemBackend.maven):
                if not epv_cache.has_source_jar():
                    try:
                        source_jar_path = self._download_source_jar(
                            cache_path, ecosystem, arguments)
                        epv_cache.put_source_jar(source_jar_path)
                    except Exception as e:
                        self.log.info(
                            'Failed to fetch source jar for maven artifact "{n}/{v}": {err}'
                            .format(n=arguments.get('name'),
                                    v=arguments.get('version'),
                                    err=str(e)))

                if not epv_cache.has_pom_xml():
                    pom_xml_path = self._download_pom_xml(
                        cache_path, ecosystem, arguments)
                    epv_cache.put_pom_xml(pom_xml_path)
        finally:
            # always clean up cache
            shutil.rmtree(cache_path)
            if arguments['ecosystem'] == "npm":
                shutil.rmtree(npm_dir, True)

        a = Analysis(version=v,
                     access_count=1,
                     started_at=datetime.datetime.utcnow())
        db.add(a)
        db.commit()

        arguments['document_id'] = a.id

        # export ecosystem backend so we can use it to easily control flow later
        arguments['ecosystem_backend'] = ecosystem.backend.name

        self.log.debug(
            "Arguments returned by InitAnalysisFlow are: {}".format(arguments))
        return arguments