def get(self, ecosystem): query = request.args.get('q') eco = Ecosystem.by_name(rdb.session, ecosystem) fetcher = CucosReleasesFetcher(eco, rdb.session) now = datetime.datetime.now() # Instantiate two different solvers, one using a custom fetcher to fetch # matching releases from Bayesian DB and the other one fetching from # upstream repositories. # The data from these two solvers then provide information as to: # 1) Which packages in the range we have already analysed and have information # about # 2) Other packages from upstream repositories which match the version specification cucos_solver, solver = get_ecosystem_solver(eco, with_fetcher=fetcher),\ get_ecosystem_solver(eco) ours = cucos_solver.solve([query], all_versions=True) upstream = solver.solve([query], all_versions=True) ours_nums = set() if not ours else set(next(iter(ours.values()))) upstreams_nums = set() if not upstream else set( next(iter(upstream.values()))) return { 'query': query, 'detail': { 'analysed': ours, 'upstream': upstream, 'difference': list(upstreams_nums - ours_nums) }, 'resolved_at': str(now) }
def get_sources(self): """ :return: path to source files """ if not self._eco_obj: self._eco_obj = Ecosystem.by_name(self._postgres.session, self.ecosystem) if self._eco_obj.is_backed_by(EcosystemBackend.maven): return self.get_extracted_source_jar() else: return self.get_extracted_source_tarball()
def has_sources(self): """ :return: true if the given EPV has available sources """ if not self._eco_obj: self._eco_obj = Ecosystem.by_name(self._postgres.session, self.ecosystem) if self._eco_obj.is_backed_by(EcosystemBackend.maven): return self._s3.object_exists(self._source_jar_object_key) else: self._construct_source_tarball_names() return self._s3.object_exists(self._source_tarball_object_key)
def execute(self, arguments): self._strict_assert(arguments.get('name')) self._strict_assert(arguments.get('ecosystem')) # get rid of version if scheduled from the core analyses arguments.pop('version', None) db = self.storage.session ecosystem = Ecosystem.by_name(db, arguments['ecosystem']) package = Package.get_or_create(db, ecosystem_id=ecosystem.id, name=arguments['name']) upstream = self.get_upstream_entry(db, package, self.get_upstream_url(arguments)) arguments['url'] = upstream.url if not arguments.get('force'): # can potentially schedule two flows of a same type at the same time as there is no lock, # but let's say it's OK if upstream.updated_at is not None \ and upstream.updated_at - datetime.datetime.now() < self._UPDATE_INTERVAL: self.log.info( 'Skipping upstream package check as data are considered as recent - last update %s.', upstream.updated_at) # keep track of start, but do not schedule nothing more # discard changes like updates db.rollback() return arguments # if this fails, it's actually OK, as there could be concurrency package_analysis = PackageAnalysis(package_id=package.id, started_at=datetime.datetime.now(), finished_at=None) db.add(package_analysis) # keep track of updates upstream.updated_at = datetime.datetime.now() db.commit() arguments['document_id'] = package_analysis.id return arguments
def execute(self, arguments): self._strict_assert(arguments.get('name')) self._strict_assert(arguments.get('version')) self._strict_assert(arguments.get('ecosystem')) db = self.storage.session e = Ecosystem.by_name(db, arguments['ecosystem']) p = Package.get_or_create(db, ecosystem_id=e.id, name=arguments['name']) v = Version.get_or_create(db, package_id=p.id, identifier=arguments['version']) if not arguments.get('force'): # TODO: this is OK for now, but if we will scale and there will be 2+ workers running this task # they can potentially schedule two flows of a same type at the same time if db.query(Analysis).filter( Analysis.version_id == v.id).count() > 0: # we need to propagate flags that were passed to flow, but not E/P/V - this way we are sure that for # example graph import is scheduled (arguments['force_graph_sync'] == True) arguments.pop('name') arguments.pop('version') arguments.pop('ecosystem') return arguments cache_path = mkdtemp(dir=self.configuration.worker_data_dir) epv_cache = ObjectCache.get_from_dict(arguments) ecosystem = Ecosystem.by_name(db, arguments['ecosystem']) try: if not epv_cache.has_source_tarball(): _, source_tarball_path = IndianaJones.fetch_artifact( ecosystem=ecosystem, artifact=arguments['name'], version=arguments['version'], target_dir=cache_path) epv_cache.put_source_tarball(source_tarball_path) if ecosystem.is_backed_by(EcosystemBackend.maven): if not epv_cache.has_source_jar(): try: source_jar_path = self._download_source_jar( cache_path, ecosystem, arguments) epv_cache.put_source_jar(source_jar_path) except Exception as e: self.log.info( 'Failed to fetch source jar for maven artifact "{e}/{p}/{v}": {err}' .format(e=arguments.get('ecosystem'), p=arguments.get('name'), v=arguments.get('version'), err=str(e))) if not epv_cache.has_pom_xml(): pom_xml_path = self._download_pom_xml( cache_path, ecosystem, arguments) epv_cache.put_pom_xml(pom_xml_path) finally: # always clean up cache shutil.rmtree(cache_path) a = Analysis(version=v, access_count=1, started_at=datetime.datetime.now()) db.add(a) db.commit() arguments['document_id'] = a.id return arguments
def execute(self, arguments): self._strict_assert(arguments.get('ecosystem')) self._strict_assert(arguments.get('name')) self._strict_assert(arguments.get('version')) eco = arguments['ecosystem'] pkg = arguments['name'] tool_responses = {} result_summary = { 'package_names': [], 'registered_srpms': [], 'all_rhn_channels': [], 'all_rhsm_content_sets': [], 'all_rhsm_product_names': [] } result_data = {'status': 'error', 'summary': result_summary, 'details': tool_responses } # bail out early; we need access to internal services or the package is from Maven ecosystem, # otherwise we can't comment on downstream usage is_maven = Ecosystem.by_name(self.storage.session, eco).is_backed_by(EcosystemBackend.maven) if not self._is_inside_rh() and not is_maven: return result_data self.log.debug('Fetching {e}/{p} from Anitya'.format(e=eco, p=pkg)) res = self._fetch_anitya_project(eco, pkg) anitya_rpm_names = [] anitya_mvn_names = [] if res is None: result_data['status'] = 'error' elif res.status_code == 200: self.log.debug('Retrieved {e}/{p} from Anitya'.format(e=eco, p=pkg)) anitya_response = res.json() tool_responses['redhat_anitya'] = anitya_response # For now, we assume all downstreams are ones we care about for entry in anitya_response['packages']: if entry['distro'] == RH_RPM_DISTRO_NAME: anitya_rpm_names.append(entry['package_name']) elif entry['distro'] == RH_MVN_DISTRO_NAME: anitya_mvn_names.append(entry['package_name']) else: self.log.warning( 'Unknown distro {d} for downstream package {o} (package {p}) in Anitya'. format(d=entry['distro'], o=entry['package_name'], p=pkg) ) self.log.debug('Candidate RPM names from Anitya: {}'.format(anitya_rpm_names)) self.log.debug('Candidate MVN names from Anitya: {}'.format(anitya_mvn_names)) # TODO: Report 'partial' here and switch to 'success' at the end result_data['status'] = 'success' else: msg = 'Failed to find Anitya project {e}/{p}. Anitya response: {r}' self.log.error(msg.format(e=eco, p=pkg, r=res.text)) result_data['status'] = 'error' if self._is_inside_rh(): # we have candidate downstream name mappings, check them against Brew seed_names = anitya_rpm_names or [self._prefix_package_name(pkg, eco)] self.log.debug('Checking candidate names in Brew: {}'.format(seed_names)) args = ['brew-utils-cli', '--version', arguments['version']] artifact_hash = self._get_artifact_hash(algorithm='sha256') if artifact_hash: args += ['--digest', artifact_hash] args += seed_names self.log.debug("Executing command, timeout={timeout}: {cmd}".format(timeout=self._BREWUTILS_CLI_TIMEOUT, cmd=args)) tc = TimedCommand(args) status, output, error = tc.run(timeout=self._BREWUTILS_CLI_TIMEOUT) self.log.debug("status = %s, error = %s", status, error) output = ''.join(output) self.log.debug("output = %s", output) if not output: raise TaskError("Error running command %s" % args) brew = json.loads(output) result_summary['package_names'] = brew['packages'] result_summary['registered_srpms'] = brew['response']['registered_srpms'] tool_responses['brew'] = brew['response']['brew'] # we have SRPM details, fetch details on where the RPMs are shipped tool_responses['pulp_cdn'] = pulp_responses = [] rhn_channels = set() rhsm_content_sets = set() rhsm_product_names = set() for srpm_summary in result_summary['registered_srpms']: srpm_filename = "{n}-{v}-{r}.src.rpm".format(n=srpm_summary['package_name'], v=srpm_summary['version'], r=srpm_summary['release']) cdn_metadata = self._get_cdn_metadata(srpm_filename) if cdn_metadata is None: msg = 'Error getting shipping data for {e}/{p} SRPM: {srpm}' self.log.error(msg.format(e=eco, p=pkg, srpm=srpm_filename)) continue pulp_responses.append(cdn_metadata) srpm_summary['published_in'] = cdn_metadata['rhsm_product_names'] rhn_channels.update(cdn_metadata['rhn_channels']) rhsm_content_sets.update(cdn_metadata['rhsm_content_sets']) rhsm_product_names.update(cdn_metadata['rhsm_product_names']) result_summary['all_rhn_channels'] = sorted(rhn_channels) result_summary['all_rhsm_content_sets'] = sorted(rhsm_content_sets) result_summary['all_rhsm_product_names'] = sorted(rhsm_product_names) self._add_mvn_results(result_summary, anitya_mvn_names, arguments['version']) return result_data
def get_ecosystem(self, name): if not self.is_connected(): self.connect() return Ecosystem.by_name(PostgresBase.session, name)