def execute(self, arguments): """Task code. :param arguments: dictionary with task arguments :return: {}, results """ self._strict_assert(isinstance(arguments.get('ecosystem'), str)) self._strict_assert(isinstance(arguments.get('name'), str)) if arguments['ecosystem'] not in _SUPPORTED_ECOSYSTEMS: raise FatalTaskError('Unknown ecosystem: %r' % arguments['ecosystem']) # Don't ingest for private packages if not is_pkg_public(arguments['ecosystem'], arguments['name']): logger.info("Private package ingestion ignored %s %s", arguments['ecosystem'], arguments['name']) raise NotABugFatalTaskError("Private package alert {} {}".format( arguments['ecosystem'], arguments['name'])) return arguments
def test_is_pkg_public(): """Test is_pkg_public function.""" val = is_pkg_public("npm", "lodash") assert val is True val = is_pkg_public("maven", "io.vertx:vertx-web") assert val is True val = is_pkg_public("pypi", "scipy") assert val is True val = is_pkg_public("npm", "lodashssss") assert val is False val = is_pkg_public("maven", "io.vertx:vertx-webssss") assert val is False val = is_pkg_public("pypi", "scipyssss") assert val is False
def execute(self, arguments): """Task code. :param arguments: dictionary with task arguments :return: {}, results """ self._strict_assert(isinstance(arguments.get('ecosystem'), str)) self._strict_assert(isinstance(arguments.get('name'), str)) # get rid of version if scheduled from the core analyses arguments.pop('version', None) arguments.pop('document_id', None) db = self.storage.session try: ecosystem = Ecosystem.by_name(db, arguments['ecosystem']) except NoResultFound: raise FatalTaskError('Unknown ecosystem: %r' % arguments['ecosystem']) # Dont try ingestion for private packages if is_pkg_public(arguments['ecosystem'], arguments['name']): self.log.info("Package analysis flow for {} {}".format( arguments['ecosystem'], arguments['name'])) else: self.log.info("Private package ignored " "{} {} in init_package_flow".format( arguments['ecosystem'], arguments['name'])) raise NotABugFatalTaskError("Private package alert " "{} {} in init_package_flow".format( arguments['ecosystem'], arguments['name'])) package = Package.get_or_create(db, ecosystem_id=ecosystem.id, name=arguments['name']) url = self.get_upstream_url(arguments) upstream = self.get_upstream_entry(package, url) if upstream is None: upstream = self.add_or_update_upstream(package, url) arguments['url'] = upstream.url if not arguments.get('force'): # can potentially schedule two flows of a same type at the same # time as there is no lock, but let's say it's OK if upstream.updated_at is not None \ and datetime.datetime.utcnow() - upstream.updated_at < self._UPDATE_INTERVAL: self.log.info( 'Skipping upstream package check as data are considered as recent - ' 'last update %s.', upstream.updated_at) # keep track of start, but do not schedule nothing more # discard changes like updates db.rollback() return arguments # if this fails, it's actually OK, as there could be concurrency package_analysis = PackageAnalysis( package_id=package.id, started_at=datetime.datetime.utcnow(), finished_at=None) db.add(package_analysis) # keep track of updates upstream.updated_at = datetime.datetime.utcnow() db.commit() arguments['document_id'] = package_analysis.id return arguments
def ingest_epv_into_graph(epv_details): """Handle implementation of API for triggering ingestion flow. :param epv_details: A dictionary object having list of packages/version as a nested object. Ex: { "ecosystem": "<ecosystem_name>", (*required) "packages": [ { "package": "<package_name_1>", (*required) "version": "<package_version_1>" (*required) }, { "package": "<package_name_2>", (*required) "version": "<package_version_2>" (*required) } ], "force": false, (optional) "force_graph_sync": true, (optional) "recursive_limit": 0 (optional) "source": "<Consumer_of_API>"(optional) } """ logger.info('graph_ingestion_:_ingest_epv_into_graph() is called.') input_data = epv_details.get('body', {}) # Check if worker flow activation is disabled. if not _INVOKE_API_WORKERS: logger.debug('Worker flows are disabled.') input_data['message'] = 'Worker flows are disabled.' return input_data, 201 source = input_data.get('source', '') # Check if API consumer is CA or SA and unknown package ingestion flag is disabled. if _DISABLE_UNKNOWN_PACKAGE_FLOW and source == 'api': logger.debug('Unknown package ingestion is disabled.') input_data['message'] = 'Unknown package ingestion is disabled.' return input_data, 201 gh = GithubUtils() ecosystem = input_data.get('ecosystem') package_list = input_data.get('packages') node_arguments = { "ecosystem": ecosystem, "force": input_data.get('force', True), "recursive_limit": input_data.get('recursive_limit', 0), "force_graph_sync": input_data.get('force_graph_sync', False) } # Iterate through packages given for current ecosystem. for item in package_list: # Dont try ingestion for private packages if not is_pkg_public(ecosystem, item.get('package')): logger.info("Private package ingestion is ignored {} {}".format( ecosystem, item.get('package'))) item['error_message'] = 'Private package ingestion is ignored.' continue if ecosystem == 'golang': _, clean_version = GolangDependencyTreeGenerator.\ clean_version(item.get('version')) if gh.is_pseudo_version(clean_version): item[ 'error_message'] = 'Golang pseudo version is not supported.' continue flow_name = 'newPackageFlow' if ecosystem == 'golang' else 'bayesianApiFlow' if 'flow_name' in input_data: flow_name = input_data['flow_name'] node_arguments['name'] = item.get('package') node_arguments['version'] = item.get('version') try: # Initiate Selinon flow for current EPV ingestion. dispacher_id = run_flow(flow_name, node_arguments) item['dispacher_id'] = dispacher_id.id except Exception as e: logger.error('Exception while initiating the worker flow %s', e) return {'message': 'Failed to initiate worker flow.'}, 500 logger.info('Source %s initiated a %s for eco: %s, pkg: %s, ver: %s', source, flow_name, ecosystem, item['package'], item['version']) return input_data, 201
def execute(self, arguments): """Task code. :param arguments: dictionary with task arguments :return: {}, results """ self.log.debug("Input Arguments: {}".format(arguments)) self._strict_assert(isinstance(arguments.get('ecosystem'), str)) self._strict_assert(isinstance(arguments.get('name'), str)) self._strict_assert(isinstance(arguments.get('version'), str)) db = self.storage.session try: ecosystem = Ecosystem.by_name(db, arguments['ecosystem']) except NoResultFound: raise FatalTaskError('Unknown ecosystem: %r' % arguments['ecosystem']) # make sure we store package name in its normalized form arguments['name'] = normalize_package_name(ecosystem.backend.name, arguments['name']) if len(pattern_ignore.findall(arguments['version'])) > 0: self.log.info("Incorrect version alert {} {}".format( arguments['name'], arguments['version'])) raise NotABugFatalTaskError("Incorrect version alert {} {}".format( arguments['name'], arguments['version'])) # Dont try ingestion for private packages if is_pkg_public(arguments['ecosystem'], arguments['name']): self.log.info("Ingestion flow for {} {}".format( arguments['ecosystem'], arguments['name'])) else: self.log.info("Private package ingestion ignored {} {}".format( arguments['ecosystem'], arguments['name'])) raise NotABugFatalTaskError("Private package alert {} {}".format( arguments['ecosystem'], arguments['name'])) p = Package.get_or_create(db, ecosystem_id=ecosystem.id, name=arguments['name']) v = Version.get_or_create(db, package_id=p.id, identifier=arguments['version']) if not arguments.get('force'): if db.query(Analysis).filter( Analysis.version_id == v.id).count() > 0: arguments['analysis_already_exists'] = True self.log.debug( "Arguments returned by initAnalysisFlow without force: {}". format(arguments)) return arguments cache_path = mkdtemp(dir=self.configuration.WORKER_DATA_DIR) epv_cache = ObjectCache.get_from_dict(arguments) npm_dir = self.configuration.NPM_DATA_DIR try: if not epv_cache.\ has_source_tarball(): _, source_tarball_path = IndianaJones.fetch_artifact( ecosystem=ecosystem, artifact=arguments['name'], version=arguments['version'], target_dir=cache_path) epv_cache.put_source_tarball(source_tarball_path) if ecosystem.is_backed_by(EcosystemBackend.maven): if not epv_cache.has_source_jar(): try: source_jar_path = self._download_source_jar( cache_path, ecosystem, arguments) epv_cache.put_source_jar(source_jar_path) except Exception as e: self.log.info( 'Failed to fetch source jar for maven artifact "{n}/{v}": {err}' .format(n=arguments.get('name'), v=arguments.get('version'), err=str(e))) if not epv_cache.has_pom_xml(): pom_xml_path = self._download_pom_xml( cache_path, ecosystem, arguments) epv_cache.put_pom_xml(pom_xml_path) finally: # always clean up cache shutil.rmtree(cache_path) if arguments['ecosystem'] == "npm": shutil.rmtree(npm_dir, True) a = Analysis(version=v, access_count=1, started_at=datetime.datetime.utcnow()) db.add(a) db.commit() arguments['document_id'] = a.id # export ecosystem backend so we can use it to easily control flow later arguments['ecosystem_backend'] = ecosystem.backend.name self.log.debug( "Arguments returned by InitAnalysisFlow are: {}".format(arguments)) return arguments