def execute(self, arguments=None, persist=True, check_license=False): """Execute task.""" started_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f") results = arguments.get('result', None) external_request_id = arguments.get('external_request_id', None) recommendations = [] input_stack = {} for result in results: temp_input_stack = {d["package"]: d["version"] for d in result.get("details", [])[0].get("_resolved")} input_stack.update(temp_input_stack) for result in results: details = result['details'][0] resolved = details['_resolved'] manifest_file_path = details['manifest_file_path'] recommendation = { 'companion': [], 'alternate': [], 'usage_outliers': [], 'manifest_file_path': manifest_file_path } new_arr = [r['package'] for r in resolved] # If new_err is empty list if not new_arr: recommendations.append(recommendation) continue insights_payload = { 'ecosystem': details['ecosystem'], 'unknown_packages_ratio_threshold': float(os.environ.get('UNKNOWN_PACKAGES_THRESHOLD', 0.3)), 'package_list': new_arr, 'comp_package_count_threshold': int(os.environ.get( 'MAX_COMPANION_PACKAGES', 5)) } if details['ecosystem'] in self.kronos_ecosystems: insights_payload.update({ 'alt_package_count_threshold': int(os.environ.get('MAX_ALTERNATE_PACKAGES', 2)), 'outlier_probability_threshold': float(os.environ.get('OUTLIER_THRESHOLD', 0.6)), 'user_persona': "1", # TODO - remove janus hardcoded value }) input_task_for_insights_recommender = [insights_payload] # Call PGM and get the response start = datetime.datetime.utcnow() insights_response = self.call_insights_recommender(input_task_for_insights_recommender) elapsed_seconds = (datetime.datetime.utcnow() - start).total_seconds() msg = "It took {t} seconds to get insight's response" \ "for external request {e}.".format(t=elapsed_seconds, e=external_request_id) logger.info(msg) # From PGM response process companion and alternate packages and # then get Data from Graph # TODO - implement multiple manifest file support for below loop if insights_response is not None: for insights_result in insights_response: companion_packages = [] ecosystem = insights_result['ecosystem'] # Get usage based outliers recommendation['usage_outliers'] = \ insights_result.get('outlier_package_list', []) # Append Topics for User Stack recommendation['input_stack_topics'] = insights_result.get( 'package_to_topic_dict', {}) # Add missing packages unknown to PGM recommendation['missing_packages_pgm'] = insights_result.get( 'missing_packages', []) for pkg in insights_result['companion_packages']: companion_packages.append(pkg['package_name']) # Get Companion Packages from Graph comp_packages_graph = GraphDB().get_version_information(companion_packages, ecosystem) # Apply Version Filters filtered_comp_packages_graph, filtered_list = GraphDB().filter_versions( comp_packages_graph, input_stack, external_request_id, rec_type="COMPANION") filtered_companion_packages = \ set(companion_packages).difference(set(filtered_list)) logger.info( "Companion Packages Filtered for external_request_id {} {}" .format(external_request_id, filtered_companion_packages) ) # Get the topmost alternate package for each input package alternate_packages, final_dict = GraphDB.get_topmost_alternate( insights_result=insights_result, input_stack=input_stack ) alt_packages_graph = [] if alternate_packages: alt_packages_graph = GraphDB().get_version_information( alternate_packages, ecosystem) # Apply Version Filters filtered_alt_packages_graph, filtered_list = GraphDB().filter_versions( alt_packages_graph, input_stack, external_request_id, rec_type="ALTERNATE") filtered_alternate_packages = \ set(alternate_packages).difference(set(filtered_list)) logger.info( "Alternate Packages Filtered for external_request_id {} {}" .format(external_request_id, filtered_alternate_packages) ) if check_license: # Apply License Filters lic_filtered_alt_graph, lic_filtered_comp_graph = \ License.perform_license_analysis( resolved=resolved, ecosystem=ecosystem, filtered_alt_packages_graph=filtered_alt_packages_graph, filtered_comp_packages_graph=filtered_comp_packages_graph, filtered_alternate_packages=filtered_alternate_packages, filtered_companion_packages=filtered_companion_packages, external_request_id=external_request_id ) else: lic_filtered_alt_graph = filtered_alt_packages_graph lic_filtered_comp_graph = filtered_comp_packages_graph # Get Topics Added to Filtered Packages topics_comp_packages_graph = GraphDB(). \ get_topics_for_comp(lic_filtered_comp_graph, insights_result.get('companion_packages', [])) # Create Companion Block comp_packages = create_package_dict(topics_comp_packages_graph) final_comp_packages = \ set_valid_cooccurrence_probability(comp_packages) recommendation['companion'] = final_comp_packages # Get Topics Added to Filtered Packages topics_comp_packages_graph = GraphDB(). \ get_topics_for_alt(lic_filtered_alt_graph, insights_result.get('alternate_packages', {})) # Create Alternate Dict alt_packages = create_package_dict(topics_comp_packages_graph, final_dict) recommendation['alternate'] = alt_packages recommendations.append(recommendation) else: return { 'recommendation': 'pgm_error', 'external_request_id': external_request_id, 'message': 'PGM Fetching error' } ended_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f") audit = {'started_at': started_at, 'ended_at': ended_at, 'version': 'v1'} task_result = { 'recommendations': recommendations, '_audit': audit, '_release': 'None:None:None' } if persist: return persist_data_in_db(external_request_id=external_request_id, task_result=task_result) else: return {'recommendation': 'success', 'external_request_id': external_request_id, 'result': task_result}
def execute(self, arguments=None, persist=True, check_license=False): """Execute task.""" # TODO: reduce cyclomatic complexity started_at = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%S.%f") results = arguments.get('result', None) external_request_id = arguments.get('external_request_id', None) input_task_for_insights_recommender = [] recommendations = [] # TODO: refactoring input_stack = {} for result in results: temp_input_stack = { d["package"]: d["version"] for d in result.get("details", [])[0].get("_resolved") } input_stack.update(temp_input_stack) for result in results: details = result['details'][0] resolved = details['_resolved'] manifest_file_path = details['manifest_file_path'] recommendation = { 'companion': [], 'alternate': [], 'usage_outliers': [], 'manifest_file_path': manifest_file_path } new_arr = [r['package'] for r in resolved] # If new_err is empty list if not new_arr: recommendations.append(recommendation) continue json_object = { 'ecosystem': details['ecosystem'], 'unknown_packages_ratio_threshold': float(os.environ.get('UNKNOWN_PACKAGES_THRESHOLD', 0.3)), 'package_list': new_arr, 'comp_package_count_threshold': int(os.environ.get('MAX_COMPANION_PACKAGES', 5)) } if details['ecosystem'] in self.kronos_ecosystems: json_object.update({ 'alt_package_count_threshold': int(os.environ.get('MAX_ALTERNATE_PACKAGES', 2)), 'outlier_probability_threshold': float(os.environ.get('OUTLIER_THRESHOLD', 0.6)), 'user_persona': "1", # TODO - remove janus hardcoded value }) input_task_for_insights_recommender.append(json_object) # Call PGM and get the response start = datetime.datetime.utcnow() insights_response = self.call_insights_recommender( input_task_for_insights_recommender) elapsed_seconds = (datetime.datetime.utcnow() - start).total_seconds() msg = "It took {t} seconds to get insight's response" \ "for external request {e}.".format(t=elapsed_seconds, e=external_request_id) logger.info(msg) # From PGM response process companion and alternate packages and # then get Data from Graph # TODO - implement multiple manifest file support for below loop if insights_response is not None: for insights_result in insights_response: companion_packages = [] ecosystem = insights_result['ecosystem'] # Get usage based outliers recommendation['usage_outliers'] = \ insights_result.get('outlier_package_list', []) # Append Topics for User Stack recommendation['input_stack_topics'] = insights_result.get( 'package_to_topic_dict', {}) # Add missing packages unknown to PGM recommendation[ 'missing_packages_pgm'] = insights_result.get( 'missing_packages', []) for pkg in insights_result['companion_packages']: companion_packages.append(pkg['package_name']) # Get Companion Packages from Graph comp_packages_graph = GraphDB().get_version_information( companion_packages, ecosystem) # Apply Version Filters filtered_comp_packages_graph, filtered_list = GraphDB( ).filter_versions(comp_packages_graph, input_stack, external_request_id, rec_type="COMPANION") filtered_companion_packages = \ set(companion_packages).difference(set(filtered_list)) logger.info( "Companion Packages Filtered for external_request_id {} {}" .format(external_request_id, filtered_companion_packages)) # Get the topmost alternate package for each input package # Create intermediate dict to Only Get Top 1 companion # packages for the time being. temp_dict = {} for pkg_name, contents in insights_result.get( 'alternate_packages', {}).items(): pkg = {} for ind in contents: pkg[ind['package_name']] = ind['similarity_score'] temp_dict[pkg_name] = pkg final_dict = {} alternate_packages = [] for pkg_name, contents in temp_dict.items(): # For each input package # Get only the topmost alternate package from a set of # packages based on similarity score top_dict = dict(Counter(contents).most_common(1)) for alt_pkg, sim_score in top_dict.items(): final_dict[alt_pkg] = { 'version': input_stack[pkg_name], 'replaces': pkg_name, 'similarity_score': sim_score } alternate_packages.append(alt_pkg) # if alternate_packages: # Get Alternate Packages from Graph alt_packages_graph = GraphDB().get_version_information( alternate_packages, ecosystem) # Apply Version Filters filtered_alt_packages_graph, filtered_list = GraphDB( ).filter_versions(alt_packages_graph, input_stack, external_request_id, rec_type="ALTERNATE") filtered_alternate_packages = \ set(alternate_packages).difference(set(filtered_list)) logger.info( "Alternate Packages Filtered for external_request_id {} {}" .format(external_request_id, filtered_alternate_packages)) if check_license: # apply license based filters list_user_stack_comp = extract_user_stack_package_licenses( resolved, ecosystem) license_filter_output = apply_license_filter( list_user_stack_comp, filtered_alt_packages_graph, filtered_comp_packages_graph) lic_filtered_alt_graph = license_filter_output[ 'filtered_alt_packages_graph'] lic_filtered_comp_graph = license_filter_output[ 'filtered_comp_packages_graph'] lic_filtered_list_alt = license_filter_output[ 'filtered_list_pkg_names_alt'] lic_filtered_list_com = license_filter_output[ 'filtered_list_pkg_names_com'] else: lic_filtered_alt_graph = filtered_alt_packages_graph lic_filtered_comp_graph = filtered_comp_packages_graph lic_filtered_list_alt = lic_filtered_list_com = list() if len(lic_filtered_list_alt) > 0: s = set(filtered_alternate_packages).difference( set(lic_filtered_list_alt)) msg = \ "Alternate Packages filtered (licenses) for external_request_id {} {}" \ .format(external_request_id, s) logger.info(msg) if len(lic_filtered_list_com) > 0: s = set(filtered_companion_packages).difference( set(lic_filtered_list_com)) msg = "Companion Packages filtered (licenses) for external_request_id {} " \ "{}".format(external_request_id, s) logger.info(msg) # Get Topics Added to Filtered Packages topics_comp_packages_graph = GraphDB(). \ get_topics_for_comp(lic_filtered_comp_graph, insights_result.get('companion_packages', [])) # Create Companion Block comp_packages = create_package_dict( topics_comp_packages_graph) recommendation['companion'] = comp_packages # Get Topics Added to Filtered Packages topics_comp_packages_graph = GraphDB(). \ get_topics_for_alt(lic_filtered_alt_graph, insights_result.get('alternate_packages', {})) # Create Alternate Dict alt_packages = create_package_dict( topics_comp_packages_graph, final_dict) recommendation['alternate'] = alt_packages recommendations.append(recommendation) else: return { 'recommendation': 'pgm_error', 'external_request_id': external_request_id, 'message': 'PGM Fetching error' } ended_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f") audit = { 'started_at': started_at, 'ended_at': ended_at, 'version': 'v1' } task_result = { 'recommendations': recommendations, '_audit': audit, '_release': 'None:None:None' } if persist: # Store the result in RDS try: insert_stmt = insert(WorkerResult).values( worker='recommendation_v2', worker_id=None, external_request_id=external_request_id, analysis_id=None, task_result=task_result, error=False) do_update_stmt = insert_stmt.on_conflict_do_update( index_elements=['id'], set_=dict(task_result=task_result)) session.execute(do_update_stmt) session.commit() return { 'recommendation': 'success', 'external_request_id': external_request_id, 'result': task_result } except SQLAlchemyError as e: session.rollback() return { 'recommendation': 'database error', 'external_request_id': external_request_id, 'message': '%s' % e } else: return { 'recommendation': 'success', 'external_request_id': external_request_id, 'result': task_result }
def execute(self, arguments=None): started_at = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%S.%f") results = arguments.get('result', None) external_request_id = arguments.get('external_request_id', None) input_task_for_pgm = [] recommendations = [] input_stack = {} for result in results: temp_input_stack = { d["package"]: d["version"] for d in result.get("details", [])[0].get("_resolved") } input_stack.update(temp_input_stack) for result in results: details = result['details'][0] resolved = details['_resolved'] manifest_file_path = details['manifest_file_path'] recommendation = { 'companion': [], 'alternate': [], 'usage_outliers': [], 'manifest_file_path': manifest_file_path } new_arr = [r['package'] for r in resolved] json_object = { 'ecosystem': details['ecosystem'], 'comp_package_count_threshold': int(os.environ.get('MAX_COMPANION_PACKAGES', 5)), 'alt_package_count_threshold': int(os.environ.get('MAX_ALTERNATE_PACKAGES', 2)), 'outlier_probability_threshold': float(os.environ.get('OUTLIER_THRESHOLD', 0.6)), 'unknown_packages_ratio_threshold': float(os.environ.get('UNKNOWN_PACKAGES_THRESHOLD', 0.3)), 'user_persona': "1", # TODO - remove janus hardcoded value # completely and assing a cateogory here 'package_list': new_arr } input_task_for_pgm.append(json_object) # Call PGM and get the response start = datetime.datetime.utcnow() pgm_response = self.call_pgm(input_task_for_pgm) elapsed_seconds = (datetime.datetime.utcnow() - start).total_seconds() msg = 'It took {t} seconds to get response from PGM ' \ 'for external request {e}.'.format(t=elapsed_seconds, e=external_request_id) print(msg) # From PGM response process companion and alternate packages and # then get Data from Graph # TODO - implement multiple manifest file support for below loop if pgm_response is not None: for pgm_result in pgm_response: companion_packages = [] ecosystem = pgm_result['ecosystem'] # Get usage based outliers recommendation['usage_outliers'] = \ pgm_result['outlier_package_list'] # Append Topics for User Stack recommendation['input_stack_topics'] = pgm_result.get( 'package_to_topic_dict', {}) for pkg in pgm_result['companion_packages']: companion_packages.append(pkg['package_name']) # Get Companion Packages from Graph comp_packages_graph = GraphDB().get_version_information( companion_packages, ecosystem) # Apply Version Filters filtered_comp_packages_graph, filtered_list = GraphDB( ).filter_versions(comp_packages_graph, input_stack) filtered_companion_packages = \ set(companion_packages).difference(set(filtered_list)) _logger.info( "Companion Packages Filtered for external_request_id {} {}" .format(external_request_id, filtered_companion_packages)) # Get the topmost alternate package for each input package # Create intermediate dict to Only Get Top 1 companion # packages for the time being. temp_dict = {} for pkg_name, contents in pgm_result[ 'alternate_packages'].items(): pkg = {} for ind in contents: pkg[ind['package_name']] = ind['similarity_score'] temp_dict[pkg_name] = pkg final_dict = {} alternate_packages = [] filtered_alt_packages_graph = [] for pkg_name, contents in temp_dict.items(): # For each input package # Get only the topmost alternate package from a set of # packages based on similarity score top_dict = dict(Counter(contents).most_common(1)) for alt_pkg, sim_score in top_dict.items(): final_dict[alt_pkg] = { 'version': input_stack[pkg_name], 'replaces': pkg_name, 'similarity_score': sim_score } alternate_packages.append(alt_pkg) # if alternate_packages: # Get Alternate Packages from Graph alt_packages_graph = GraphDB().get_version_information( alternate_packages, ecosystem) # Apply Version Filters filtered_alt_packages_graph, filtered_list = GraphDB( ).filter_versions(alt_packages_graph, input_stack) filtered_alternate_packages = \ set(alternate_packages).difference(set(filtered_list)) _logger.info( "Alternate Packages Filtered for external_request_id {} {}" .format(external_request_id, filtered_alternate_packages)) # apply license based filters list_user_stack_comp = extract_user_stack_package_licenses( resolved, ecosystem) license_filter_output = apply_license_filter( list_user_stack_comp, filtered_alt_packages_graph, filtered_comp_packages_graph) lic_filtered_alt_graph = license_filter_output[ 'filtered_alt_packages_graph'] lic_filtered_comp_graph = license_filter_output[ 'filtered_comp_packages_graph'] lic_filtered_list_alt = license_filter_output[ 'filtered_list_pkg_names_alt'] lic_filtered_list_com = license_filter_output[ 'filtered_list_pkg_names_com'] if len(lic_filtered_list_alt) > 0: s = set(filtered_alternate_packages).difference( set(lic_filtered_list_alt)) msg = \ "Alternate Packages filtered (licenses) for external_request_id {} {}" \ .format(external_request_id, s) _logger.info(msg) if len(lic_filtered_list_com) > 0: s = set(filtered_companion_packages).difference( set(lic_filtered_list_com)) msg = \ "Companion Packages filtered (licenses) for external_request_id {} {}" \ .format(external_request_id, s) _logger.info(msg) # Get Topics Added to Filtered Packages topics_comp_packages_graph = GraphDB(). \ get_topics_for_comp(lic_filtered_comp_graph, pgm_result['companion_packages']) # Create Companion Block comp_packages = create_package_dict( topics_comp_packages_graph) recommendation['companion'] = comp_packages # Get Topics Added to Filtered Packages topics_comp_packages_graph = GraphDB(). \ get_topics_for_alt(lic_filtered_alt_graph, pgm_result['alternate_packages']) # Create Alternate Dict alt_packages = create_package_dict( topics_comp_packages_graph, final_dict) recommendation['alternate'] = alt_packages recommendations.append(recommendation) ended_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f") audit = { 'started_at': started_at, 'ended_at': ended_at, 'version': 'v1' } task_result = { 'recommendations': recommendations, '_audit': audit, '_release': 'None:None:None' } wr = WorkerResult(worker='recommendation_v2', worker_id=None, external_request_id=os.getenv( 'TEST_REQUEST_ID', external_request_id), analysis_id=None, task_result=task_result, error=False) # Store the result in RDS try: session.add(wr) session.commit() except SQLAlchemyError as e: session.rollback() return { 'recommendation': 'database error', 'external_request_id': external_request_id, 'message': '%s' % e } return { 'recommendation': 'success', 'external_request_id': external_request_id }