Ejemplo n.º 1
0
    def normalize_ingestion_data(self,
                                 start_date,
                                 end_date,
                                 ingestion_data,
                                 frequency='daily'):
        """Normalize worker data for reporting."""
        logger.info("Normalize Ingestion Data started")
        report_type = 'ingestion-data'
        if frequency == 'monthly':
            report_name = dt.strptime(end_date, '%Y-%m-%d').strftime('%Y-%m')
        else:
            report_name = dt.strptime(end_date,
                                      '%Y-%m-%d').strftime('%Y-%m-%d')

        template = {
            'report': {
                'from': start_date,
                'to': end_date,
                'generated_on': dt.now().isoformat('T')
            },
            'ingestion_summary': {},
            'ingestion_details': {}
        }

        epv_data = ingestion_data['EPV_DATA']
        epv_data = json.loads(epv_data)

        # Populate the default template with EPV info
        template, epvs = self.populate_default_information(epv_data, template)

        logger.info("Fetching details of the latest version for the epvs")
        today = dt.today()
        pkg_output = generate_report_for_latest_version(epvs, today)
        logger.info("Fetching details of the unknown packages for the epvs")
        ver_output = generate_report_for_unknown_epvs(epvs)

        # Call the function to add the package information to the template
        template, latest_epvs = self.generate_results(epvs, template,
                                                      pkg_output, ver_output)

        # Call the function to get the availability of latest node
        logger.info("Checking if latest node exists in graph")
        template = self.check_latest_node(latest_epvs, template)

        # Saving the final report in the relevant S3 bucket
        try:
            obj_key = '{type}/epv/{report_name}.json'.format(
                type=report_type, report_name=report_name)
            self.s3.store_json_content(content=template,
                                       obj_key=obj_key,
                                       bucket_name=self.s3.report_bucket_name)
        except Exception as e:
            logger.exception('Unable to store the report on S3. Reason: %r' %
                             e)
        return template
Ejemplo n.º 2
0
    def check_latest_node(self, latest_epvs, template):
        """Get if latest node is present in graph."""
        graph_output = generate_report_for_unknown_epvs(latest_epvs)
        missing_latest = {}
        for epv in latest_epvs:
            eco = epv['ecosystem']
            pkg = epv['name']
            ver = epv['version']
            output = graph_output[eco + "@DELIM@" + pkg + "@DELIM@" + ver]
            template['ingestion_details'][eco][pkg][
                'latest_node_in_graph'] = output

            # If the EPV is missing in graph, add it to the summary
            if output == "false":
                if eco not in missing_latest:
                    missing_latest[eco] = []
                tmp = {"package": pkg, "version": ver}
                missing_latest[eco].append(tmp)
        template['ingestion_summary']['missing_latest_node'] = missing_latest
        return template
Ejemplo n.º 3
0
    def normalize_ingestion_data(self,
                                 start_date,
                                 end_date,
                                 ingestion_data,
                                 frequency='daily'):
        """Normalize worker data for reporting."""
        report_type = 'ingestion-data'
        if frequency == 'monthly':
            report_name = dt.strptime(end_date, '%Y-%m-%d').strftime('%Y-%m')
        else:
            report_name = dt.strptime(end_date,
                                      '%Y-%m-%d').strftime('%Y-%m-%d')

        template = {
            'report': {
                'from': start_date,
                'to': end_date,
                'generated_on': dt.now().isoformat('T')
            },
            'ingestion_summary': {},
            'ingestion_details_v2': {}
        }

        all_deps_count = {'all': 0}
        failed_deps_count = {'all': 0}
        all_epv_list_v2 = {}

        # Graph Availability validation starts
        success_epv_data = ingestion_data['EPV_GRAPH_SUCCESS_DATA']
        success_epv_data = json.loads(success_epv_data)
        graph_input = []
        for data in success_epv_data:
            all_deps_count['all'] = all_deps_count['all'] + 1
            if all_deps_count.get(data[0]) is None:
                all_deps_count[data[0]] = 0
            all_deps_count[data[0]] = all_deps_count[data[0]] + 1
            graph_template = {
                'ecosystem': data[0],
                'name': data[1],
                'version': data[2]
            }
            graph_input.append(graph_template)

        # The below graph call determines the eistence of ingested epvs in the graph
        graph_output = generate_report_for_unknown_epvs(graph_input)
        for attributes, values in graph_output.items():
            versn_template = {}
            epv_arr = attributes.split('@')
            if all_epv_list_v2.get(epv_arr[0]) is None:
                all_epv_list_v2[epv_arr[0]] = {}
            all_epv_list_v2[epv_arr[0]][epv_arr[1]] = {}
            all_epv_list_v2[epv_arr[0]][epv_arr[1]]['package_known'] = values
            all_epv_list_v2[epv_arr[0]][epv_arr[1]]['versions'] = []
            versn_template['version'] = epv_arr[2]
            versn_template['ingested_in_graph'] = values
            if values == 'false':
                failed_deps_count['all'] = failed_deps_count['all'] + 1
                if failed_deps_count.get(epv_arr[0]) is None:
                    failed_deps_count[epv_arr[0]] = 0
                failed_deps_count[
                    epv_arr[0]] = failed_deps_count[epv_arr[0]] + 1
            all_epv_list_v2[epv_arr[0]][epv_arr[1]]['versions'].append(
                versn_template)

        failed_epv_data = ingestion_data['EPV_GRAPH_FAILED_DATA']
        failed_epv_data = json.loads(failed_epv_data)
        for data in failed_epv_data:
            versn_template = {}
            all_deps_count['all'] = all_deps_count['all'] + 1
            if all_deps_count.get(data[0]) is None:
                all_deps_count[data[0]] = 0
            all_deps_count[data[0]] = all_deps_count[data[0]] + 1
            failed_deps_count['all'] = failed_deps_count['all'] + 1
            if failed_deps_count.get(data[0]) is None:
                failed_deps_count[data[0]] = 0
            failed_deps_count[data[0]] = failed_deps_count[data[0]] + 1
            if all_epv_list_v2.get(data[0]) is None:
                all_epv_list_v2[data[0]] = {}
            all_epv_list_v2[data[0]][data[1]] = {}
            all_epv_list_v2[data[0]][data[1]]['versions'] = []
            versn_template['version'] = data[2]
            versn_template['ingested_in_graph'] = 'false'
            all_epv_list_v2[data[0]][data[1]]['versions'].append(
                versn_template)
            graph_template = {
                'ecosystem': data[0],
                'name': data[1],
                'version': data[2]
            }
            graph_input.append(graph_template)

        # The below graph call determines the latest version information for
        # ingested epvs in the graph
        graph_output = generate_report_for_latest_version(graph_input)
        for attributes, values in graph_output.items():
            epv_arr = attributes.split('@')
            if all_epv_list_v2.get(epv_arr[0]) is None:
                all_epv_list_v2[epv_arr[0]] = {}
            all_epv_list_v2[epv_arr[0]][epv_arr[1]]['known_latest_version'] = \
                values['known_latest_version']
            all_epv_list_v2[epv_arr[0]][epv_arr[1]]['actual_latest_version'] = \
                values['actual_latest_version']
            if values['known_latest_version'] == '':
                all_epv_list_v2[epv_arr[0]][
                    epv_arr[1]]['package_known'] = 'false'

        template['ingestion_details_v2'] = all_epv_list_v2

        # creating the epv ingestion statistics info according to the ecosystems
        template['ingestion_summary'][
            'total_epv_ingestion_count'] = all_deps_count['all']
        for data in all_deps_count:
            if failed_deps_count.get(data) is None:
                failed_deps_count[data] = 0
            stats_template = {
                'epv_ingestion_count':
                all_deps_count[data],
                'epv_successfully_ingested_count':
                all_deps_count[data] - failed_deps_count[data],
                'failed_epv_ingestion_count':
                failed_deps_count[data]
            }
            template['ingestion_summary'][data] = stats_template

        # Saving the final report in the relevant S3 bucket
        try:
            obj_key = '{depl_prefix}/{type}/epv/{report_name}.json'.format(
                depl_prefix=self.s3.deployment_prefix,
                type=report_type,
                report_name=report_name)
            self.s3.store_json_content(content=template,
                                       obj_key=obj_key,
                                       bucket_name=self.s3.report_bucket_name)
        except Exception as e:
            logger.exception('Unable to store the report on S3. Reason: %r' %
                             e)
        return template