def execute(self, arguments, db, manifests, source=None): """Dependency finder logic.""" # TODO: reduce cyclomatic complexity # If we receive a manifest file we need to save it first result = [] for manifest in manifests: content_hash = None if source == 'osio': content_hash = generate_content_hash(manifest['content']) current_app.logger.info("{} file digest is {}".format(manifest['filename'], content_hash)) s3 = AmazonS3(bucket_name='boosters-manifest') try: s3.connect() manifest['content'] = s3.retrieve_blob(content_hash).decode('utf-8') except ClientError as e: current_app.logger.error("Unexpected error while retrieving S3 data: %s" % e) raise with TemporaryDirectory() as temp_path: with open(os.path.join(temp_path, manifest['filename']), 'a+') as fd: fd.write(manifest['content']) # mercator-go does not work if there is no package.json if 'shrinkwrap' in manifest['filename'].lower(): with open(os.path.join(temp_path, 'package.json'), 'w') as f: f.write(json.dumps({})) # Create instance manually since stack analysis is not handled by dispatcher subtask = MercatorTask.create_test_instance(task_name='metadata') arguments['ecosystem'] = manifest['ecosystem'] out = subtask.run_mercator(arguments, temp_path, resolve_poms=False) if not out["details"]: raise FatalTaskError("No metadata found processing manifest file '{}'" .format(manifest['filename'])) if 'dependencies' not in out['details'][0] and out.get('status', None) == 'success': raise FatalTaskError("Dependencies could not be resolved from manifest file '{}'" .format(manifest['filename'])) out["details"][0]['manifest_file'] = manifest['filename'] out["details"][0]['ecosystem'] = manifest['ecosystem'] out["details"][0]['manifest_file_path'] = manifest.get('filepath', 'File path not available') # If we're handling an external request we need to convert dependency specifications to # concrete versions that we can query later on in the `AggregatorTask` manifest_descriptor = get_manifest_descriptor_by_filename(manifest['filename']) if 'external_request_id' in arguments: manifest_dependencies = [] if manifest_descriptor.has_resolved_deps: # npm-shrinkwrap.json, pom.xml if "_dependency_tree_lock" in out["details"][0]: # npm-shrinkwrap.json if 'dependencies' in out['details'][0]["_dependency_tree_lock"]: manifest_dependencies = out["details"][0]["_dependency_tree_lock"].get( "dependencies", []) else: # pom.xml if 'dependencies' in out['details'][0]: manifest_dependencies = out["details"][0].get("dependencies", []) if manifest_descriptor.has_recursive_deps: # npm-shrinkwrap.json def _flatten(deps, collect): for dep in deps: collect.append({'package': dep['name'], 'version': dep['version']}) _flatten(dep['dependencies'], collect) resolved_deps = [] _flatten(manifest_dependencies, resolved_deps) else: # pom.xml resolved_deps =\ [{'package': x.split(' ')[0], 'version': x.split(' ')[1]} for x in manifest_dependencies] else: # package.json, requirements.txt try: resolved_deps = self._handle_external_deps( Ecosystem.by_name(db, arguments['ecosystem']), out["details"][0]["dependencies"]) except Exception: raise out["details"][0]['_resolved'] = resolved_deps result.append(out) return {'result': result}
def post(): """Handle the POST REST API call.""" # TODO: reduce cyclomatic complexity decoded = decode_token() github_token = get_access_token('github') sid = request.args.get('sid') license_files = list() check_license = request.args.get('check_license', 'false') == 'true' github_url = request.form.get("github_url") ref = request.form.get('github_ref') user_email = request.headers.get('UserEmail') scan_repo_url = request.headers.get('ScanRepoUrl') headers = 'HEADER INFO: %r' % request.headers if not user_email: user_email = decoded.get('email', '*****@*****.**') if scan_repo_url: try: api_url = GEMINI_SERVER_URL dependency_files = request.files.getlist('dependencyFile[]') current_app.logger.info('%r' % dependency_files) data = {'git-url': scan_repo_url, 'email-ids': [user_email]} if dependency_files: files = list() for dependency_file in dependency_files: # http://docs.python-requests.org/en/master/user/advanced/#post-multiple-multipart-encoded-files files.append((dependency_file.name, (dependency_file.filename, dependency_file.read(), 'text/plain'))) _session.headers['Authorization'] = request.headers.get( 'Authorization') _session.post( '{}/api/v1/user-repo/scan/experimental'.format( api_url), data=data, files=files) else: _session.headers['Authorization'] = request.headers.get( 'Authorization') _session.post('{}/api/v1/user-repo/scan'.format(api_url), json=data) except Exception as exc: raise HTTPError(500, "Could not process the scan endpoint call") \ from exc source = request.form.get('source') if github_url is not None: files = fetch_file_from_github_release( url=github_url, filename='pom.xml', token=github_token.get('access_token'), ref=ref) else: files = request.files.getlist('manifest[]') filepaths = request.values.getlist('filePath[]') license_files = request.files.getlist('license[]') current_app.logger.info('%r' % files) current_app.logger.info('%r' % filepaths) # At least one manifest file path should be present to analyse a stack if not filepaths: raise HTTPError(400, error="Error processing request. " "Please send a valid manifest file path.") if len(files) != len(filepaths): raise HTTPError( 400, error="Error processing request. " "Number of manifests and filePaths must be the same.") # At least one manifest file should be present to analyse a stack if not files: raise HTTPError(400, error="Error processing request. " "Please upload a valid manifest files.") dt = datetime.datetime.now() if sid: request_id = sid is_modified_flag = {'is_modified': True} else: request_id = uuid.uuid4().hex is_modified_flag = {'is_modified': False} iso = datetime.datetime.utcnow().isoformat() manifests = [] ecosystem = None for index, manifest_file_raw in enumerate(files): if github_url is not None: filename = manifest_file_raw.get('filename', None) filepath = manifest_file_raw.get('filepath', None) content = manifest_file_raw.get('content') else: filename = manifest_file_raw.filename filepath = filepaths[index] content = manifest_file_raw.read().decode('utf-8') # check if manifest files with given name are supported manifest_descriptor = get_manifest_descriptor_by_filename(filename) if manifest_descriptor is None: raise HTTPError( 400, error="Manifest file '{filename}' is not supported".format( filename=filename)) # In memory file to be passed as an API parameter to /appstack manifest_file = StringIO(content) # Check if the manifest is valid if not manifest_descriptor.validate(content): raise HTTPError( 400, error="Error processing request. Please upload a valid " "manifest file '{filename}'".format(filename=filename)) # Record the response details for this manifest file manifest = { 'filename': filename, 'content': content, 'ecosystem': manifest_descriptor.ecosystem, 'filepath': filepath } manifests.append(manifest) data = { 'api_name': 'stack_analyses', 'user_email': user_email, 'user_profile': decoded } args = { 'external_request_id': request_id, 'ecosystem': ecosystem, 'data': data } try: api_url = current_app.config['F8_API_BACKBONE_HOST'] d = DependencyFinder() deps = d.execute(args, rdb.session, manifests, source) deps['external_request_id'] = request_id deps['current_stack_license'] = extract_licenses(license_files) deps.update(is_modified_flag) _session.post('{}/api/v1/stack_aggregator'.format(api_url), json=deps, params={'check_license': str(check_license).lower()}) _session.post('{}/api/v1/recommender'.format(api_url), json=deps, params={'check_license': str(check_license).lower()}) except Exception as exc: raise HTTPError( 500, ("Could not process {t}.".format(t=request_id))) from exc try: insert_stmt = insert(StackAnalysisRequest).values( id=request_id, submitTime=str(dt), requestJson={ 'manifest': manifests, 'header_info': headers }, dep_snapshot=deps) do_update_stmt = insert_stmt.on_conflict_do_update( index_elements=['id'], set_=dict(dep_snapshot=deps)) rdb.session.execute(do_update_stmt) rdb.session.commit() return { "status": "success", "submitted_at": str(dt), "id": str(request_id) } except SQLAlchemyError as e: raise HTTPError( 500, "Error updating log for request {t}".format(t=sid)) from e
def post(): decoded = decode_token() github_url = request.form.get("github_url") if github_url is not None: files = GithubRead().get_files_github_url(github_url) else: files = request.files.getlist('manifest[]') filepaths = request.values.getlist('filePath[]') # At least one manifest file path should be present to analyse a stack if not filepaths: raise HTTPError(400, error="Error processing request. " "Please send a valid manifest file path.") if len(files) != len(filepaths): raise HTTPError(400, error="Error processing request. " "Number of manifests and filePaths must be the same.") # At least one manifest file should be present to analyse a stack if not files: raise HTTPError(400, error="Error processing request. " "Please upload a valid manifest files.") dt = datetime.datetime.now() origin = request.form.get('origin') request_id = uuid.uuid4().hex manifests = [] ecosystem = None for index, manifest_file_raw in enumerate(files): if github_url is not None: filename = manifest_file_raw.get('filename', None) filepath = manifest_file_raw.get('filepath', None) content = manifest_file_raw.get('content') else: filename = manifest_file_raw.filename filepath = filepaths[index] content = manifest_file_raw.read().decode('utf-8') # check if manifest files with given name are supported manifest_descriptor = get_manifest_descriptor_by_filename(filename) if manifest_descriptor is None: raise HTTPError(400, error="Manifest file '{filename}' is not supported".format( filename=filename)) # In memory file to be passed as an API parameter to /appstack manifest_file = StringIO(content) # Check if the manifest is valid if not manifest_descriptor.validate(content): raise HTTPError(400, error="Error processing request. Please upload a valid " "manifest file '{filename}'".format(filename=filename)) # appstack API call # Limitation: Currently, appstack can support only package.json # The following condition is to be reworked appstack_id = '' if 'package.json' in filename: appstack_files = {'packagejson': manifest_file} url = current_app.config["BAYESIAN_ANALYTICS_URL"] endpoint = "{analytics_baseurl}/api/{version}/appstack".format( analytics_baseurl=url, version=ANALYTICS_API_VERSION) try: response = requests.post(endpoint, files=appstack_files) except Exception as exc: current_app.logger.warn("Analytics query: {}".format(exc)) else: if response.status_code == 200: resp = response.json() appstack_id = resp.get('appstack_id', '') else: current_app.logger.warn("{status}: {error}".format( status=response.status_code, error=response.content)) # Record the response details for this manifest file manifest = {'filename': filename, 'content': content, 'ecosystem': manifest_descriptor.ecosystem, 'filepath': filepath} if appstack_id != '': manifest['appstack_id'] = appstack_id manifests.append(manifest) # Insert in a single commit. Gains - a) performance, b) avoid insert inconsistencies # for a single request try: req = StackAnalysisRequest( id=request_id, submitTime=str(dt), requestJson={'manifest': manifests}, origin=origin ) rdb.session.add(req) rdb.session.commit() except SQLAlchemyError as e: current_app.logger.exception('Failed to create new analysis request') raise HTTPError(500, "Error inserting log for request {t}".format(t=request_id)) from e try: data = {'api_name': 'stack_analyses', 'user_email': decoded.get('email', '*****@*****.**'), 'user_profile': decoded} args = {'external_request_id': request_id, 'ecosystem': ecosystem, 'data': data} server_run_flow('stackApiGraphV2Flow', args) except Exception as exc: # Just log the exception here for now current_app.logger.exception('Failed to schedule AggregatingMercatorTask for id {id}' .format(id=request_id)) raise HTTPError(500, ("Error processing request {t}. manifest files " "could not be processed" .format(t=request_id))) from exc return {"status": "success", "submitted_at": str(dt), "id": str(request_id)}
def execute(self, arguments): """Task code. :param arguments: dictionary with task arguments :return: {}, results """ self._strict_assert(arguments.get('data')) self._strict_assert(arguments.get('external_request_id')) db = self.storage.session try: results = db.query(StackAnalysisRequest)\ .filter(StackAnalysisRequest.id == arguments.get('external_request_id'))\ .first() except SQLAlchemyError: db.rollback() raise manifests = [] if results is not None: row = results.to_dict() request_json = row.get("requestJson", {}) manifests = request_json.get('manifest', []) # If we receive a manifest file we need to save it first result = [] for manifest in manifests: with TemporaryDirectory() as temp_path: with open(os.path.join(temp_path, manifest['filename']), 'a+') as fd: fd.write(manifest['content']) # mercator-go does not work if there is no package.json if 'shrinkwrap' in manifest['filename'].lower(): with open(os.path.join(temp_path, 'package.json'), 'w') as f: f.write(json.dumps({})) # Create instance manually since stack analysis is not handled by dispatcher subtask = MercatorTask.create_test_instance( task_name=self.task_name) arguments['ecosystem'] = manifest['ecosystem'] out = subtask.run_mercator(arguments, temp_path) if not out["details"]: raise FatalTaskError( "No metadata found processing manifest file '{}'".format( manifest['filename'])) if 'dependencies' not in out['details'][0] and out.get( 'status', None) == 'success': raise FatalTaskError( "Dependencies could not be resolved from manifest file '{}'" .format(manifest['filename'])) out["details"][0]['manifest_file'] = manifest['filename'] out["details"][0]['ecosystem'] = manifest['ecosystem'] out["details"][0]['manifest_file_path'] = manifest.get( 'filepath', 'File path not available') # If we're handling an external request we need to convert dependency specifications to # concrete versions that we can query later on in the `AggregatorTask` manifest_descriptor = get_manifest_descriptor_by_filename( manifest['filename']) if 'external_request_id' in arguments: manifest_dependencies = [] if manifest_descriptor.has_resolved_deps: # npm-shrinkwrap.json, pom.xml if "_dependency_tree_lock" in out["details"][ 0]: # npm-shrinkwrap.json if 'dependencies' in out['details'][0][ "_dependency_tree_lock"]: manifest_dependencies = out["details"][0][ "_dependency_tree_lock"].get( "dependencies", []) else: # pom.xml if 'dependencies' in out['details'][0]: manifest_dependencies = out["details"][0].get( "dependencies", []) if manifest_descriptor.has_recursive_deps: # npm-shrinkwrap.json def _flatten(deps, collect): for dep in deps: collect.append({ 'package': dep['name'], 'version': dep['version'] }) _flatten(dep['dependencies'], collect) resolved_deps = [] _flatten(manifest_dependencies, resolved_deps) else: # pom.xml resolved_deps =\ [{'package': x.split(' ')[0], 'version': x.split(' ')[1]} for x in manifest_dependencies] else: # package.json, requirements.txt resolved_deps = self._handle_external_deps( self.storage.get_ecosystem(arguments['ecosystem']), out["details"][0]["dependencies"]) out["details"][0]['_resolved'] = resolved_deps result.append(out) return {'result': result}
def post(): """Handle the POST REST API call.""" decoded = decode_token() github_url = request.form.get("github_url") if github_url is not None: files = GithubRead().get_files_github_url(github_url) else: files = request.files.getlist('manifest[]') filepaths = request.values.getlist('filePath[]') current_app.logger.info('%r' % files) current_app.logger.info('%r' % filepaths) # At least one manifest file path should be present to analyse a stack if not filepaths: raise HTTPError(400, error="Error processing request. " "Please send a valid manifest file path.") if len(files) != len(filepaths): raise HTTPError( 400, error="Error processing request. " "Number of manifests and filePaths must be the same.") # At least one manifest file should be present to analyse a stack if not files: raise HTTPError(400, error="Error processing request. " "Please upload a valid manifest files.") dt = datetime.datetime.now() request_id = uuid.uuid4().hex iso = datetime.datetime.utcnow().isoformat() manifests = [] ecosystem = None for index, manifest_file_raw in enumerate(files): if github_url is not None: filename = manifest_file_raw.get('filename', None) filepath = manifest_file_raw.get('filepath', None) content = manifest_file_raw.get('content') else: filename = manifest_file_raw.filename filepath = filepaths[index] content = manifest_file_raw.read().decode('utf-8') # check if manifest files with given name are supported manifest_descriptor = get_manifest_descriptor_by_filename(filename) if manifest_descriptor is None: raise HTTPError( 400, error="Manifest file '{filename}' is not supported".format( filename=filename)) # In memory file to be passed as an API parameter to /appstack manifest_file = StringIO(content) # Check if the manifest is valid if not manifest_descriptor.validate(content): raise HTTPError( 400, error="Error processing request. Please upload a valid " "manifest file '{filename}'".format(filename=filename)) # Record the response details for this manifest file manifest = { 'filename': filename, 'content': content, 'ecosystem': manifest_descriptor.ecosystem, 'filepath': filepath } manifests.append(manifest) try: req = StackAnalysisRequest( id=request_id, submitTime=str(dt), requestJson={'manifest': manifests}, ) rdb.session.add(req) rdb.session.commit() except SQLAlchemyError as e: raise HTTPError( 500, "Error inserting log for request {t}".format( t=request_id)) from e data = { 'api_name': 'stack_analyses', 'user_email': decoded.get('email', '*****@*****.**'), 'user_profile': decoded } args = { 'external_request_id': request_id, 'ecosystem': ecosystem, 'data': data } try: api_url = current_app.config['F8_API_BACKBONE_HOST'] d = DependencyFinder() deps = d.execute(args, rdb.session) deps['external_request_id'] = request_id _session.post('{}/api/v1/stack_aggregator'.format(api_url), json=deps) _session.post('{}/api/v1/recommender'.format(api_url), json=deps) except Exception as exc: raise HTTPError( 500, ("Could not process {t}.".format(t=request_id))) from exc return { "status": "success", "submitted_at": str(dt), "id": str(request_id) }