def execute(self, repositories, ecosystem, bucket_name, object_key): """ Aggregate package names from GitHub manifests. :param repositories: a list of repositories :param ecosystem: ecosystem, will appear in the resulting JSON file :param bucket_name: name of the bucket where to put the resulting JSON file :param object_key: object key of the resulting JSON file """ s3 = StoragePool.get_connected_storage('S3GitHubManifestMetadata') package_list = [] tagger_list = [] for repo in repositories: try: repo_ecosystem = repo['ecosystem'] repo_name = repo['repo_name'] except ValueError: self.log.error('Invalid configuration, skipping: {config}'.format( config=str(repo))) continue try: obj = '{e}/{repo_name}/dependency_snapshot.json'.format( e=repo_ecosystem, repo_name=repo_name.replace('/', ':')) dependency_snapshot = s3.retrieve_dict(obj) dependencies = dependency_snapshot.get('details', {}).get('runtime', []) packages = list({x.get('name') for x in dependencies}) if packages: package_list.append(packages) packages_version = dict([(x.get("name"), x.get("version")) for x in dependencies]) if packages_version: extracted_tagger_list = self._create_tagger_list(ecosystem, packages_version) for etl in extracted_tagger_list: tagger_list.append(etl) except Exception as e: self.log.error('Unable to collect dependencies for {repo_name}: {reason}'.format( repo_name=repo_name, reason=str(e))) continue results = { 'ecosystem': ecosystem, 'package_list': package_list } self.log.info("Storing aggregated list of packages in S3") s3_dest = AmazonS3(bucket_name=bucket_name) s3_dest.connect() s3_dest.store_dict(results, object_key) s3_dest.store_dict(tagger_list, "tagger_list" + object_key)
def store_results(cls, results, table_name): """Store results from BigQuery in our DB.""" # TODO: implement store() method in S3BigQuery for this and assign # S3BigQuery to this task in nodes.yml csv_file = None try: csv_file, csv_header = cls.prepare_csv_file(results) cls.dump_to_rdb(csv_file, csv_header, table_name) if AmazonS3.is_enabled(): s3 = StoragePool.get_connected_storage('S3BigQuery') s3.store_file(csv_file, table_name) finally: if csv_file: os.unlink(csv_file)
def post(): input_json = request.get_json() if not request.json or 'request_id' not in input_json: raise HTTPError(400, error="Expected JSON request") if 'feedback' not in input_json: raise HTTPError(400, error="Expected feedback") s3 = AmazonS3(bucket_name=UserFeedback._ANALYTICS_BUCKET_NAME) s3.connect() # Store data key = "{}".format(input_json["request_id"]) s3.store_dict(input_json, key) return {'status': 'success'}
def post(self): input_json = request.get_json() if not request.json or 'request_id' not in input_json: raise HTTPError(400, error="Expected JSON request") if 'recommendation' not in input_json or 'name' not in input_json['recommendation']: raise HTTPError(400, error="Expected field name in recommendation") s3 = AmazonS3(bucket_name=self._ANALYTICS_BUCKET_NAME) s3.connect() # Store data key = "{}-{}".format(input_json["request_id"], input_json["recommendation"]["name"]) s3.store_dict(input_json, key) return {'status': 'success'}
def execute(self, arguments, db, manifests, source=None): """Dependency finder logic.""" # TODO: reduce cyclomatic complexity # If we receive a manifest file we need to save it first result = [] for manifest in manifests: content_hash = None if source == 'osio': content_hash = generate_content_hash(manifest['content']) current_app.logger.info("{} file digest is {}".format(manifest['filename'], content_hash)) s3 = AmazonS3(bucket_name='boosters-manifest') try: s3.connect() manifest['content'] = s3.retrieve_blob(content_hash).decode('utf-8') except ClientError as e: current_app.logger.error("Unexpected error while retrieving S3 data: %s" % e) raise with TemporaryDirectory() as temp_path: with open(os.path.join(temp_path, manifest['filename']), 'a+') as fd: fd.write(manifest['content']) # mercator-go does not work if there is no package.json if 'shrinkwrap' in manifest['filename'].lower(): with open(os.path.join(temp_path, 'package.json'), 'w') as f: f.write(json.dumps({})) # Create instance manually since stack analysis is not handled by dispatcher subtask = MercatorTask.create_test_instance(task_name='metadata') arguments['ecosystem'] = manifest['ecosystem'] out = subtask.run_mercator(arguments, temp_path, resolve_poms=False) if not out["details"]: raise FatalTaskError("No metadata found processing manifest file '{}'" .format(manifest['filename'])) if 'dependencies' not in out['details'][0] and out.get('status', None) == 'success': raise FatalTaskError("Dependencies could not be resolved from manifest file '{}'" .format(manifest['filename'])) out["details"][0]['manifest_file'] = manifest['filename'] out["details"][0]['ecosystem'] = manifest['ecosystem'] out["details"][0]['manifest_file_path'] = manifest.get('filepath', 'File path not available') # If we're handling an external request we need to convert dependency specifications to # concrete versions that we can query later on in the `AggregatorTask` manifest_descriptor = get_manifest_descriptor_by_filename(manifest['filename']) if 'external_request_id' in arguments: manifest_dependencies = [] if manifest_descriptor.has_resolved_deps: # npm-shrinkwrap.json, pom.xml if "_dependency_tree_lock" in out["details"][0]: # npm-shrinkwrap.json if 'dependencies' in out['details'][0]["_dependency_tree_lock"]: manifest_dependencies = out["details"][0]["_dependency_tree_lock"].get( "dependencies", []) else: # pom.xml if 'dependencies' in out['details'][0]: manifest_dependencies = out["details"][0].get("dependencies", []) if manifest_descriptor.has_recursive_deps: # npm-shrinkwrap.json def _flatten(deps, collect): for dep in deps: collect.append({'package': dep['name'], 'version': dep['version']}) _flatten(dep['dependencies'], collect) resolved_deps = [] _flatten(manifest_dependencies, resolved_deps) else: # pom.xml resolved_deps =\ [{'package': x.split(' ')[0], 'version': x.split(' ')[1]} for x in manifest_dependencies] else: # package.json, requirements.txt try: resolved_deps = self._handle_external_deps( Ecosystem.by_name(db, arguments['ecosystem']), out["details"][0]["dependencies"]) except Exception: raise out["details"][0]['_resolved'] = resolved_deps result.append(out) return {'result': result}