def execute(self, repositories, ecosystem, bucket_name, object_key):
        """ Aggregate package names from GitHub manifests.

        :param repositories: a list of repositories
        :param ecosystem: ecosystem, will appear in the resulting JSON file
        :param bucket_name: name of the bucket where to put the resulting JSON file
        :param object_key: object key of the resulting JSON file
        """

        s3 = StoragePool.get_connected_storage('S3GitHubManifestMetadata')

        package_list = []
        tagger_list = []
        for repo in repositories:

            try:
                repo_ecosystem = repo['ecosystem']
                repo_name = repo['repo_name']
            except ValueError:
                self.log.error('Invalid configuration, skipping: {config}'.format(
                    config=str(repo)))
                continue

            try:
                obj = '{e}/{repo_name}/dependency_snapshot.json'.format(
                    e=repo_ecosystem, repo_name=repo_name.replace('/', ':'))
                dependency_snapshot = s3.retrieve_dict(obj)

                dependencies = dependency_snapshot.get('details', {}).get('runtime', [])

                packages = list({x.get('name') for x in dependencies})
                if packages:
                    package_list.append(packages)

                packages_version = dict([(x.get("name"), x.get("version")) for x in dependencies])
                if packages_version:
                    extracted_tagger_list = self._create_tagger_list(ecosystem, packages_version)
                    for etl in extracted_tagger_list:
                            tagger_list.append(etl)

            except Exception as e:
                self.log.error('Unable to collect dependencies for {repo_name}: {reason}'.format(
                    repo_name=repo_name, reason=str(e)))
                continue

        results = {
            'ecosystem': ecosystem,
            'package_list': package_list
        }

        self.log.info("Storing aggregated list of packages in S3")

        s3_dest = AmazonS3(bucket_name=bucket_name)
        s3_dest.connect()
        s3_dest.store_dict(results, object_key)
        s3_dest.store_dict(tagger_list, "tagger_list" + object_key)
Exemple #2
0
    def store_results(cls, results, table_name):
        """Store results from BigQuery in our DB."""
        # TODO: implement store() method in S3BigQuery for this and assign
        # S3BigQuery to this task in nodes.yml
        csv_file = None
        try:
            csv_file, csv_header = cls.prepare_csv_file(results)
            cls.dump_to_rdb(csv_file, csv_header, table_name)

            if AmazonS3.is_enabled():
                s3 = StoragePool.get_connected_storage('S3BigQuery')
                s3.store_file(csv_file, table_name)

        finally:
            if csv_file:
                os.unlink(csv_file)
Exemple #3
0
    def post():
        input_json = request.get_json()

        if not request.json or 'request_id' not in input_json:
            raise HTTPError(400, error="Expected JSON request")

        if 'feedback' not in input_json:
            raise HTTPError(400, error="Expected feedback")

        s3 = AmazonS3(bucket_name=UserFeedback._ANALYTICS_BUCKET_NAME)
        s3.connect()
        # Store data
        key = "{}".format(input_json["request_id"])
        s3.store_dict(input_json, key)

        return {'status': 'success'}
Exemple #4
0
    def post(self):
        input_json = request.get_json()

        if not request.json or 'request_id' not in input_json:
            raise HTTPError(400, error="Expected JSON request")

        if 'recommendation' not in input_json or 'name' not in input_json['recommendation']:
            raise HTTPError(400, error="Expected field name in recommendation")

        s3 = AmazonS3(bucket_name=self._ANALYTICS_BUCKET_NAME)
        s3.connect()
        # Store data
        key = "{}-{}".format(input_json["request_id"], input_json["recommendation"]["name"])
        s3.store_dict(input_json, key)

        return {'status': 'success'}
Exemple #5
0
    def execute(self, arguments, db, manifests, source=None):
        """Dependency finder logic."""
        # TODO: reduce cyclomatic complexity
        # If we receive a manifest file we need to save it first
        result = []
        for manifest in manifests:
            content_hash = None
            if source == 'osio':
                content_hash = generate_content_hash(manifest['content'])
                current_app.logger.info("{} file digest is {}".format(manifest['filename'],
                                                                      content_hash))

                s3 = AmazonS3(bucket_name='boosters-manifest')
                try:
                    s3.connect()
                    manifest['content'] = s3.retrieve_blob(content_hash).decode('utf-8')
                except ClientError as e:
                    current_app.logger.error("Unexpected error while retrieving S3 data: %s" % e)
                    raise

            with TemporaryDirectory() as temp_path:
                with open(os.path.join(temp_path, manifest['filename']), 'a+') as fd:
                    fd.write(manifest['content'])

                # mercator-go does not work if there is no package.json
                if 'shrinkwrap' in manifest['filename'].lower():
                    with open(os.path.join(temp_path, 'package.json'), 'w') as f:
                        f.write(json.dumps({}))

                # Create instance manually since stack analysis is not handled by dispatcher
                subtask = MercatorTask.create_test_instance(task_name='metadata')
                arguments['ecosystem'] = manifest['ecosystem']
                out = subtask.run_mercator(arguments, temp_path, resolve_poms=False)

            if not out["details"]:
                raise FatalTaskError("No metadata found processing manifest file '{}'"
                                     .format(manifest['filename']))

            if 'dependencies' not in out['details'][0] and out.get('status', None) == 'success':
                raise FatalTaskError("Dependencies could not be resolved from manifest file '{}'"
                                     .format(manifest['filename']))

            out["details"][0]['manifest_file'] = manifest['filename']
            out["details"][0]['ecosystem'] = manifest['ecosystem']
            out["details"][0]['manifest_file_path'] = manifest.get('filepath',
                                                                   'File path not available')

            # If we're handling an external request we need to convert dependency specifications to
            # concrete versions that we can query later on in the `AggregatorTask`
            manifest_descriptor = get_manifest_descriptor_by_filename(manifest['filename'])
            if 'external_request_id' in arguments:
                manifest_dependencies = []
                if manifest_descriptor.has_resolved_deps:  # npm-shrinkwrap.json, pom.xml
                    if "_dependency_tree_lock" in out["details"][0]:  # npm-shrinkwrap.json
                        if 'dependencies' in out['details'][0]["_dependency_tree_lock"]:
                            manifest_dependencies = out["details"][0]["_dependency_tree_lock"].get(
                                "dependencies", [])
                    else:  # pom.xml
                        if 'dependencies' in out['details'][0]:
                            manifest_dependencies = out["details"][0].get("dependencies", [])
                    if manifest_descriptor.has_recursive_deps:  # npm-shrinkwrap.json
                        def _flatten(deps, collect):
                            for dep in deps:
                                collect.append({'package': dep['name'], 'version': dep['version']})
                                _flatten(dep['dependencies'], collect)
                        resolved_deps = []
                        _flatten(manifest_dependencies, resolved_deps)
                    else:  # pom.xml
                        resolved_deps =\
                            [{'package': x.split(' ')[0], 'version': x.split(' ')[1]}
                             for x in manifest_dependencies]
                else:  # package.json, requirements.txt
                    try:
                        resolved_deps = self._handle_external_deps(
                            Ecosystem.by_name(db, arguments['ecosystem']),
                            out["details"][0]["dependencies"])
                    except Exception:
                        raise

                out["details"][0]['_resolved'] = resolved_deps
            result.append(out)

        return {'result': result}