Example #1
0
    def get(self, ecosystem):
        query = request.args.get('q')
        eco = Ecosystem.by_name(rdb.session, ecosystem)
        fetcher = CucosReleasesFetcher(eco, rdb.session)
        now = datetime.datetime.now()

        # Instantiate two different solvers, one using a custom fetcher to fetch
        # matching releases from Bayesian DB and the other one fetching from
        # upstream repositories.
        # The data from these two solvers then provide information as to:
        #   1) Which packages in the range we have already analysed and have information
        #        about
        #   2) Other packages from upstream repositories which match the version specification
        cucos_solver, solver = get_ecosystem_solver(eco, with_fetcher=fetcher),\
                               get_ecosystem_solver(eco)

        ours = cucos_solver.solve([query], all_versions=True)
        upstream = solver.solve([query], all_versions=True)

        ours_nums = set() if not ours else set(next(iter(ours.values())))
        upstreams_nums = set() if not upstream else set(
            next(iter(upstream.values())))

        return {
            'query': query,
            'detail': {
                'analysed': ours,
                'upstream': upstream,
                'difference': list(upstreams_nums - ours_nums)
            },
            'resolved_at': str(now)
        }
Example #2
0
    def get_sources(self):
        """
        :return: path to source files
        """
        if not self._eco_obj:
            self._eco_obj = Ecosystem.by_name(self._postgres.session,
                                              self.ecosystem)

        if self._eco_obj.is_backed_by(EcosystemBackend.maven):
            return self.get_extracted_source_jar()
        else:
            return self.get_extracted_source_tarball()
Example #3
0
    def has_sources(self):
        """
        :return: true if the given EPV has available sources
        """
        if not self._eco_obj:
            self._eco_obj = Ecosystem.by_name(self._postgres.session,
                                              self.ecosystem)

        if self._eco_obj.is_backed_by(EcosystemBackend.maven):
            return self._s3.object_exists(self._source_jar_object_key)
        else:
            self._construct_source_tarball_names()
            return self._s3.object_exists(self._source_tarball_object_key)
    def execute(self, arguments):
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('ecosystem'))

        # get rid of version if scheduled from the core analyses
        arguments.pop('version', None)

        db = self.storage.session
        ecosystem = Ecosystem.by_name(db, arguments['ecosystem'])
        package = Package.get_or_create(db,
                                        ecosystem_id=ecosystem.id,
                                        name=arguments['name'])
        upstream = self.get_upstream_entry(db, package,
                                           self.get_upstream_url(arguments))
        arguments['url'] = upstream.url

        if not arguments.get('force'):
            # can potentially schedule two flows of a same type at the same time as there is no lock,
            # but let's say it's OK
            if upstream.updated_at is not None \
                    and upstream.updated_at - datetime.datetime.now() < self._UPDATE_INTERVAL:
                self.log.info(
                    'Skipping upstream package check as data are considered as recent - last update %s.',
                    upstream.updated_at)
                # keep track of start, but do not schedule nothing more
                # discard changes like updates
                db.rollback()
                return arguments

        # if this fails, it's actually OK, as there could be concurrency
        package_analysis = PackageAnalysis(package_id=package.id,
                                           started_at=datetime.datetime.now(),
                                           finished_at=None)
        db.add(package_analysis)

        # keep track of updates
        upstream.updated_at = datetime.datetime.now()

        db.commit()
        arguments['document_id'] = package_analysis.id
        return arguments
    def execute(self, arguments):
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))
        self._strict_assert(arguments.get('ecosystem'))

        db = self.storage.session
        e = Ecosystem.by_name(db, arguments['ecosystem'])
        p = Package.get_or_create(db,
                                  ecosystem_id=e.id,
                                  name=arguments['name'])
        v = Version.get_or_create(db,
                                  package_id=p.id,
                                  identifier=arguments['version'])

        if not arguments.get('force'):
            # TODO: this is OK for now, but if we will scale and there will be 2+ workers running this task
            # they can potentially schedule two flows of a same type at the same time
            if db.query(Analysis).filter(
                    Analysis.version_id == v.id).count() > 0:
                # we need to propagate flags that were passed to flow, but not E/P/V - this way we are sure that for
                # example graph import is scheduled (arguments['force_graph_sync'] == True)
                arguments.pop('name')
                arguments.pop('version')
                arguments.pop('ecosystem')
                return arguments

        cache_path = mkdtemp(dir=self.configuration.worker_data_dir)
        epv_cache = ObjectCache.get_from_dict(arguments)
        ecosystem = Ecosystem.by_name(db, arguments['ecosystem'])

        try:
            if not epv_cache.has_source_tarball():
                _, source_tarball_path = IndianaJones.fetch_artifact(
                    ecosystem=ecosystem,
                    artifact=arguments['name'],
                    version=arguments['version'],
                    target_dir=cache_path)
                epv_cache.put_source_tarball(source_tarball_path)

            if ecosystem.is_backed_by(EcosystemBackend.maven):
                if not epv_cache.has_source_jar():
                    try:
                        source_jar_path = self._download_source_jar(
                            cache_path, ecosystem, arguments)
                        epv_cache.put_source_jar(source_jar_path)
                    except Exception as e:
                        self.log.info(
                            'Failed to fetch source jar for maven artifact "{e}/{p}/{v}": {err}'
                            .format(e=arguments.get('ecosystem'),
                                    p=arguments.get('name'),
                                    v=arguments.get('version'),
                                    err=str(e)))

                if not epv_cache.has_pom_xml():
                    pom_xml_path = self._download_pom_xml(
                        cache_path, ecosystem, arguments)
                    epv_cache.put_pom_xml(pom_xml_path)
        finally:
            # always clean up cache
            shutil.rmtree(cache_path)

        a = Analysis(version=v,
                     access_count=1,
                     started_at=datetime.datetime.now())
        db.add(a)
        db.commit()

        arguments['document_id'] = a.id
        return arguments
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        eco = arguments['ecosystem']
        pkg = arguments['name']
        tool_responses = {}
        result_summary = {
            'package_names': [],
            'registered_srpms': [],
            'all_rhn_channels': [],
            'all_rhsm_content_sets': [],
            'all_rhsm_product_names': []
        }
        result_data = {'status': 'error',
                       'summary': result_summary,
                       'details': tool_responses
                       }

        # bail out early; we need access to internal services or the package is from Maven ecosystem,
        # otherwise we can't comment on downstream usage
        is_maven = Ecosystem.by_name(self.storage.session, eco).is_backed_by(EcosystemBackend.maven)
        if not self._is_inside_rh() and not is_maven:
            return result_data

        self.log.debug('Fetching {e}/{p} from Anitya'.format(e=eco, p=pkg))
        res = self._fetch_anitya_project(eco, pkg)
        anitya_rpm_names = []
        anitya_mvn_names = []
        if res is None:
            result_data['status'] = 'error'
        elif res.status_code == 200:
            self.log.debug('Retrieved {e}/{p} from Anitya'.format(e=eco, p=pkg))
            anitya_response = res.json()
            tool_responses['redhat_anitya'] = anitya_response
            # For now, we assume all downstreams are ones we care about
            for entry in anitya_response['packages']:
                if entry['distro'] == RH_RPM_DISTRO_NAME:
                    anitya_rpm_names.append(entry['package_name'])
                elif entry['distro'] == RH_MVN_DISTRO_NAME:
                    anitya_mvn_names.append(entry['package_name'])
                else:
                    self.log.warning(
                        'Unknown distro {d} for downstream package {o} (package {p}) in Anitya'.
                                     format(d=entry['distro'], o=entry['package_name'], p=pkg)
                    )
            self.log.debug('Candidate RPM names from Anitya: {}'.format(anitya_rpm_names))
            self.log.debug('Candidate MVN names from Anitya: {}'.format(anitya_mvn_names))
            # TODO: Report 'partial' here and switch to 'success' at the end
            result_data['status'] = 'success'
        else:
            msg = 'Failed to find Anitya project {e}/{p}. Anitya response: {r}'
            self.log.error(msg.format(e=eco, p=pkg, r=res.text))
            result_data['status'] = 'error'

        if self._is_inside_rh():
            # we have candidate downstream name mappings, check them against Brew
            seed_names = anitya_rpm_names or [self._prefix_package_name(pkg, eco)]
            self.log.debug('Checking candidate names in Brew: {}'.format(seed_names))

            args = ['brew-utils-cli', '--version', arguments['version']]
            artifact_hash = self._get_artifact_hash(algorithm='sha256')
            if artifact_hash:
                args += ['--digest', artifact_hash]
            args += seed_names

            self.log.debug("Executing command, timeout={timeout}: {cmd}".format(timeout=self._BREWUTILS_CLI_TIMEOUT,
                                                                                cmd=args))
            tc = TimedCommand(args)
            status, output, error = tc.run(timeout=self._BREWUTILS_CLI_TIMEOUT)
            self.log.debug("status = %s, error = %s", status, error)
            output = ''.join(output)
            self.log.debug("output = %s", output)
            if not output:
                raise TaskError("Error running command %s" % args)
            brew = json.loads(output)

            result_summary['package_names'] = brew['packages']
            result_summary['registered_srpms'] = brew['response']['registered_srpms']
            tool_responses['brew'] = brew['response']['brew']

            # we have SRPM details, fetch details on where the RPMs are shipped
            tool_responses['pulp_cdn'] = pulp_responses = []
            rhn_channels = set()
            rhsm_content_sets = set()
            rhsm_product_names = set()
            for srpm_summary in result_summary['registered_srpms']:
                srpm_filename = "{n}-{v}-{r}.src.rpm".format(n=srpm_summary['package_name'],
                                                             v=srpm_summary['version'],
                                                             r=srpm_summary['release'])
                cdn_metadata = self._get_cdn_metadata(srpm_filename)
                if cdn_metadata is None:
                    msg = 'Error getting shipping data for {e}/{p} SRPM: {srpm}'
                    self.log.error(msg.format(e=eco, p=pkg, srpm=srpm_filename))
                    continue
                pulp_responses.append(cdn_metadata)
                srpm_summary['published_in'] = cdn_metadata['rhsm_product_names']
                rhn_channels.update(cdn_metadata['rhn_channels'])
                rhsm_content_sets.update(cdn_metadata['rhsm_content_sets'])
                rhsm_product_names.update(cdn_metadata['rhsm_product_names'])
            result_summary['all_rhn_channels'] = sorted(rhn_channels)
            result_summary['all_rhsm_content_sets'] = sorted(rhsm_content_sets)
            result_summary['all_rhsm_product_names'] = sorted(rhsm_product_names)

        self._add_mvn_results(result_summary, anitya_mvn_names, arguments['version'])

        return result_data
    def get_ecosystem(self, name):
        if not self.is_connected():
            self.connect()

        return Ecosystem.by_name(PostgresBase.session, name)