def execute(self, arguments=None, persist=True, check_license=False): """Execute task.""" started_at = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%S.%f") results = arguments.get('result', None) external_request_id = arguments.get('external_request_id', None) recommendations = [] input_stack = {} transitive_stack = set() for result in results: temp_input_stack = { d["package"]: d["version"] for d in result.get("details", [])[0].get("_resolved") } for tdeps in result.get("details", [])[0].get("_resolved"): temp_transitive_stack = [ d['package'] for d in tdeps.get('deps', []) ] transitive_stack.update(temp_transitive_stack) input_stack.update(temp_input_stack) for result in results: details = result['details'][0] resolved = details['_resolved'] manifest_file_path = details['manifest_file_path'] recommendation = { 'companion': [], 'alternate': [], 'usage_outliers': [], 'manifest_file_path': manifest_file_path } new_arr = [r['package'] for r in resolved] # If new_err is empty list if not new_arr: recommendations.append(recommendation) continue insights_payload = { 'ecosystem': details['ecosystem'], 'transitive_stack': list(transitive_stack), 'unknown_packages_ratio_threshold': float(os.environ.get('UNKNOWN_PACKAGES_THRESHOLD', 0.3)), 'package_list': new_arr, 'comp_package_count_threshold': int(os.environ.get('MAX_COMPANION_PACKAGES', 5)) } if details['ecosystem'] in self.kronos_ecosystems: insights_payload.update({ 'alt_package_count_threshold': int(os.environ.get('MAX_ALTERNATE_PACKAGES', 2)), 'outlier_probability_threshold': float(os.environ.get('OUTLIER_THRESHOLD', 0.6)), 'user_persona': "1", # TODO - remove janus hardcoded value }) input_task_for_insights_recommender = [insights_payload] # Call PGM and get the response start = datetime.datetime.utcnow() insights_response = self.call_insights_recommender( input_task_for_insights_recommender) elapsed_seconds = (datetime.datetime.utcnow() - start).total_seconds() msg = "It took {t} seconds to get insight's response" \ "for external request {e}.".format(t=elapsed_seconds, e=external_request_id) logger.info(msg) # From PGM response process companion and alternate packages and # then get Data from Graph # TODO - implement multiple manifest file support for below loop if insights_response is not None: for insights_result in insights_response: companion_packages = [] ecosystem = insights_result['ecosystem'] # Get usage based outliers recommendation['usage_outliers'] = \ insights_result.get('outlier_package_list', []) # Append Topics for User Stack recommendation['input_stack_topics'] = insights_result.get( 'package_to_topic_dict', {}) # Add missing packages unknown to PGM recommendation[ 'missing_packages_pgm'] = insights_result.get( 'missing_packages', []) for pkg in insights_result['companion_packages']: companion_packages.append(pkg['package_name']) # Get Companion Packages from Graph comp_packages_graph = GraphDB().get_version_information( companion_packages, ecosystem) # Apply Version Filters filtered_comp_packages_graph, filtered_list = GraphDB( ).filter_versions(comp_packages_graph, input_stack, external_request_id, rec_type="COMPANION") filtered_companion_packages = \ set(companion_packages).difference(set(filtered_list)) logger.info( "Companion Packages Filtered for external_request_id %s %s", external_request_id, filtered_companion_packages) # Get the topmost alternate package for each input package alternate_packages, final_dict = GraphDB.get_topmost_alternate( insights_result=insights_result, input_stack=input_stack) alt_packages_graph = [] if alternate_packages: alt_packages_graph = GraphDB().get_version_information( alternate_packages, ecosystem) # Apply Version Filters filtered_alt_packages_graph, filtered_list = GraphDB( ).filter_versions(alt_packages_graph, input_stack, external_request_id, rec_type="ALTERNATE") filtered_alternate_packages = \ set(alternate_packages).difference(set(filtered_list)) logger.info( "Alternate Packages Filtered for external_request_id %s %s", external_request_id, filtered_alternate_packages) if check_license: # Apply License Filters lic_filtered_alt_graph, lic_filtered_comp_graph = \ License.perform_license_analysis( resolved=resolved, ecosystem=ecosystem, filtered_alt_packages_graph=filtered_alt_packages_graph, filtered_comp_packages_graph=filtered_comp_packages_graph, filtered_alternate_packages=filtered_alternate_packages, filtered_companion_packages=filtered_companion_packages, external_request_id=external_request_id ) else: lic_filtered_alt_graph = filtered_alt_packages_graph lic_filtered_comp_graph = filtered_comp_packages_graph # Get Topics Added to Filtered Packages topics_comp_packages_graph = GraphDB(). \ get_topics_for_comp(lic_filtered_comp_graph, insights_result.get('companion_packages', [])) # Create Companion Block comp_packages = create_package_dict( topics_comp_packages_graph) final_comp_packages = \ set_valid_cooccurrence_probability(comp_packages) recommendation['companion'] = final_comp_packages # Get Topics Added to Filtered Packages topics_comp_packages_graph = GraphDB(). \ get_topics_for_alt(lic_filtered_alt_graph, insights_result.get('alternate_packages', {})) # Create Alternate Dict alt_packages = create_package_dict( topics_comp_packages_graph, final_dict) recommendation['alternate'] = alt_packages recommendations.append(recommendation) else: return { 'recommendation': 'pgm_error', 'external_request_id': external_request_id, 'message': 'PGM Fetching error' } ended_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f") audit = { 'started_at': started_at, 'ended_at': ended_at, 'version': 'v1' } task_result = { 'recommendations': recommendations, '_audit': audit, '_release': 'None:None:None' } if persist: logger.info( "Recommendation process completed for %s." " Writing to RDS.", external_request_id) persist_data_in_db(external_request_id=external_request_id, task_result=task_result, worker='recommendation_v2', started_at=started_at, ended_at=ended_at) return { 'recommendation': 'success', 'external_request_id': external_request_id, 'result': task_result }
def test_create_package_dict(_mock_count): """Test the function get_osio_user_count.""" with open('tests/data/companion_pkg_graph.json', 'r') as f: resp = json.loads(f.read()) out = create_package_dict(resp) assert len(out) > 1
def execute(self, arguments=None, persist=True, check_license=False): """Execute task.""" started_at = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%S.%f") request = RecommenderRequest(**arguments) external_request_id = request.external_request_id normalized_packages = NormalizedPackages(request.packages, request.ecosystem) recommendation = { 'companion': [], 'usage_outliers': [], } package_list = [ epv.name for epv in normalized_packages.direct_dependencies ] if package_list: insights_payload = { 'ecosystem': request.ecosystem, 'transitive_stack': [ epv.name for epv in normalized_packages.transitive_dependencies ], 'unknown_packages_ratio_threshold': float(os.environ.get('UNKNOWN_PACKAGES_THRESHOLD', 0.3)), 'package_list': package_list, 'comp_package_count_threshold': int(os.environ.get('MAX_COMPANION_PACKAGES', 5)) } if request.ecosystem in self.kronos_ecosystems: insights_payload.update({ 'outlier_probability_threshold': float(os.environ.get('OUTLIER_THRESHOLD', 0.6)), 'user_persona': "1", # TODO - remove janus hardcoded value }) input_task_for_insights_recommender = [insights_payload] # Call PGM and get the response insights_response = [] start = time.time() if request.ecosystem != 'golang': # No Companion Rec. for Golang. insights_response = self.call_insights_recommender( input_task_for_insights_recommender) logger.info('%s took %0.2f secs for call_insights_recommender()', external_request_id, time.time() - start) # From PGM response process companion and alternate packages and # then get Data from Graph # TODO - implement multiple manifest file support for below loop if insights_response is None: return { 'recommendation': 'pgm_error', 'external_request_id': external_request_id, 'message': 'PGM Fetching error' } for insights_result in insights_response: companion_packages = [] ecosystem = insights_result['ecosystem'] # Get usage based outliers recommendation['usage_outliers'] = \ insights_result.get('outlier_package_list', []) # Append Topics for User Stack recommendation['input_stack_topics'] = insights_result.get( 'package_to_topic_dict', {}) # Add missing packages unknown to PGM recommendation['missing_packages_pgm'] = insights_result.get( 'missing_packages', []) for pkg in insights_result['companion_packages']: companion_packages.append(pkg['package_name']) # Get Companion Packages from Graph graph_request_started_at = time.time() comp_packages_graph = GraphDB().get_version_information( companion_packages, ecosystem) logger.info( '%s took %0.2f secs for GraphDB().get_version_information()', external_request_id, time.time() - graph_request_started_at) # Apply Version Filters input_stack = { epv.name: epv.version for epv in normalized_packages.direct_dependencies } filtered_comp_packages_graph, filtered_list = GraphDB( ).filter_versions(comp_packages_graph, input_stack, external_request_id, rec_type="COMPANION") filtered_companion_packages = \ set(companion_packages).difference(set(filtered_list)) logger.info('%s Fitered companion packages %s', external_request_id, filtered_companion_packages) if check_license: # Apply License Filters license_request_started_at = time.time() lic_filtered_comp_graph = \ License.perform_license_analysis( packages=normalized_packages, filtered_comp_packages_graph=filtered_comp_packages_graph, filtered_companion_packages=filtered_companion_packages, external_request_id=external_request_id ) logger.info( '%s took %0.2f secs for License.perform_license_analysis()', external_request_id, time.time() - license_request_started_at) else: lic_filtered_comp_graph = filtered_comp_packages_graph # Get Topics Added to Filtered Packages topics_comp_packages_graph = GraphDB(). \ get_topics_for_comp(lic_filtered_comp_graph, insights_result.get('companion_packages', [])) # Create Companion Block comp_packages = create_package_dict(topics_comp_packages_graph) final_comp_packages = \ set_valid_cooccurrence_probability(comp_packages) recommendation['companion'] = final_comp_packages ended_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f") audit = { 'started_at': started_at, 'ended_at': ended_at, 'version': 'v2' } recommendation = StackRecommendationResult(**recommendation, **request.dict()).dict() recommendation['_audit'] = audit if persist: persist_data_in_db(external_request_id=external_request_id, task_result=recommendation, worker='recommendation_v2', started_at=started_at, ended_at=ended_at) logger.info( '%s Recommendation process completed, result persisted into RDS.', external_request_id) return { 'recommendation': 'success', 'external_request_id': external_request_id, 'result': recommendation }