Exemple #1
0
 async def save_message_and_ack(self, consumer_record):
     """Save and ack the uploaded kafka message."""
     self.prefix = 'SAVING MESSAGE'
     if consumer_record.topic == QPC_TOPIC:
         try:
             missing_fields = []
             self.upload_message = self.unpack_consumer_record(consumer_record)
             # rh_account is being deprecated so we use it as a backup if
             # account is not there
             rh_account = self.upload_message.get('rh_account')
             request_id = self.upload_message.get('request_id')
             url = self.upload_message.get('url')
             self.account_number = self.upload_message.get('account', rh_account)
             if not self.account_number:
                 missing_fields.append('account')
             if not request_id:
                 missing_fields.append('request_id')
             if not url:
                 missing_fields.append('url')
             if missing_fields:
                 raise QPCKafkaMsgException(
                     format_message(
                         self.prefix,
                         'Message missing required field(s): %s.' % ', '.join(missing_fields)))
             self.check_if_url_expired(url, request_id)
             try:
                 uploaded_report = {
                     'upload_srv_kafka_msg': json.dumps(self.upload_message),
                     'account': self.account_number,
                     'request_id': request_id,
                     'state': Report.NEW,
                     'state_info': json.dumps([Report.NEW]),
                     'last_update_time': datetime.now(pytz.utc),
                     'arrival_time': datetime.now(pytz.utc),
                     'retry_count': 0
                 }
                 report_serializer = ReportSerializer(data=uploaded_report)
                 report_serializer.is_valid(raise_exception=True)
                 report_serializer.save()
                 MSG_UPLOADS.labels(account_number=self.account_number).inc()
                 LOG.info(format_message(
                     self.prefix,
                     'Upload service message saved with request_id: %s. Ready for processing.'
                     % request_id))
                 await self.consumer.commit()
             except Exception as error:  # pylint: disable=broad-except
                 DB_ERRORS.inc()
                 LOG.error(format_message(
                     self.prefix,
                     'The following error occurred while trying to save and '
                     'commit the message: %s' % error))
                 print_error_loop_event()
         except QPCKafkaMsgException as message_error:
             LOG.error(format_message(
                 self.prefix, 'Error processing records.  Message: %s, Error: %s' %
                 (consumer_record, message_error)))
             await self.consumer.commit()
     else:
         LOG.debug(format_message(
             self.prefix, 'Message not on %s topic: %s' % (QPC_TOPIC, consumer_record)))
Exemple #2
0
    async def _send_confirmation(self, file_hash):  # pragma: no cover
        """
        Send kafka validation message to Insights Upload service.

        When a new file lands for topic 'qpc' we must validate it
        so that it will be made permanently available to other
        apps listening on the 'available' topic.
        :param: file_hash (String): Hash for file being confirmed.
        :returns None
        """
        self.prefix = 'REPORT VALIDATION STATE ON KAFKA'
        await self.producer.stop()
        self.producer = AIOKafkaProducer(
            loop=REPORT_PROCESSING_LOOP,
            bootstrap_servers=INSIGHTS_KAFKA_ADDRESS)
        try:
            await self.producer.start()
        except (KafkaConnectionError, TimeoutError, Exception):
            KAFKA_ERRORS.inc()
            self.should_run = False
            await self.producer.stop()
            print_error_loop_event()
            raise KafkaMsgHandlerError(
                format_message(self.prefix,
                               'Unable to connect to kafka server.',
                               account_number=self.account_number,
                               report_platform_id=self.report_platform_id))
        try:
            validation = {
                'hash': file_hash,
                'request_id': self.report_or_slice.request_id,
                'validation': self.status
            }
            msg = bytes(json.dumps(validation), 'utf-8')
            await self.producer.send_and_wait(VALIDATION_TOPIC, msg)
            LOG.info(
                format_message(
                    self.prefix,
                    'Send %s validation status to file upload on kafka' %
                    self.status,
                    account_number=self.account_number,
                    report_platform_id=self.report_platform_id))
        except Exception as err:  # pylint: disable=broad-except
            KAFKA_ERRORS.inc()
            LOG.error(
                format_message(self.prefix,
                               'The following error occurred: %s' % err))
            print_error_loop_event()

        finally:
            await self.producer.stop()
Exemple #3
0
    async def listen_for_messages(self, async_queue, log_message):
        """Listen for messages on the qpc topic.

        Once a message from one of these topics arrives, we add
        them to the passed in queue.
        :param consumer : Kafka consumer
        :returns None
        """
        try:
            await self.consumer.start()
        except KafkaConnectionError:
            KAFKA_ERRORS.inc()
            print_error_loop_event()
            raise KafkaMsgHandlerError('Unable to connect to kafka server.')
        except Exception as err:  # pylint: disable=broad-except
            KAFKA_ERRORS.inc()
            LOG.error(format_message(
                self.prefix, 'The following error occurred: %s' % err))
            print_error_loop_event()

        LOG.info(log_message)
        try:
            # Consume messages
            async for msg in self.consumer:
                await async_queue.put(msg)
        except Exception as err:  # pylint: disable=broad-except
            KAFKA_ERRORS.inc()
            LOG.error(format_message(
                self.prefix, 'The following error occurred: %s' % err))
            print_error_loop_event()
        finally:
            # Will leave consumer group; perform autocommit if enabled.
            await self.consumer.stop()
Exemple #4
0
    async def run(self):
        """Run the report processor in a loop.

        Later, if we find that we want to stop looping, we can
        manipulate the class variable should_run.
        """
        while self.should_run:
            if not self.report_or_slice:
                try:
                    self.assign_object()
                except Exception:  # pylint:disable=broad-except
                    print_error_loop_event()
            if self.report_or_slice:
                try:
                    await self.delegate_state()
                except Exception as error:
                    LOG.error(format_message(
                        self.prefix,
                        'The following error occurred: %s.' % str(error)))
                    self.reset_variables()
            else:
                await asyncio.sleep(NEW_REPORT_QUERY_INTERVAL)
Exemple #5
0
    def update_slice_state(self, options,
                           report_slice):  # noqa: C901 (too-complex)
        """
        Update the report processor state and save.

        :param options: <dict> dictionary potentially containing the following:
            report_slice: <ReportSlice> the report slice to update
            state: <str> the state to update to
            retry: <enum> Retry.clear=clear count, RETRY.increment=increase count
            retry_type: <str> either time=retry after time,
                git_commit=retry after new commit
            report_json: <dict> dictionary containing the report json
            report_platform_id: <str> string containing report_platform_id
            candidate_hosts: <dict> dictionary containing hosts that were
                successfully verified and uploaded
            failed_hosts: <dict> dictionary containing hosts that failed
                verification or upload
            ready_to_archive: <bool> boolean on whether or not to archive
        """
        try:
            state = options.get('state')
            retry_type = options.get('retry_type')
            retry = options.get('retry', RETRY.clear)
            candidate_hosts = options.get('candidate_hosts')
            failed_hosts = options.get('failed_hosts')
            ready_to_archive = options.get('ready_to_archive')
            status_info = Status()
            report_slice.last_update_time = datetime.now(pytz.utc)
            report_slice.state = state
            report_slice.git_commit = status_info.git_commit
            report_slice_data = {
                'last_update_time': datetime.now(pytz.utc),
                'state': state,
                'git_commit': status_info.git_commit
            }
            if not retry_type:
                retry_type = ReportSlice.TIME
            if retry == RETRY.clear:
                # After a successful transaction when we have reached the update
                # point, we want to set the retry count back to 0 because
                # any future failures should be unrelated
                report_slice_data['retry_count'] = 0
                report_slice_data['retry_type'] = ReportSlice.TIME
            elif retry == RETRY.increment:
                current_count = report_slice.retry_count
                report_slice_data['retry_count'] = current_count + 1
                report_slice_data['retry_type'] = ReportSlice.TIME
            # the other choice for retry is RETRY.keep_same in which case we don't
            # want to do anything to the retry count bc we want to preserve as is
            if candidate_hosts is not None:
                # candidate_hosts will get smaller and smaller until it hopefully
                # is empty because we have taken care of all ofthe candidates so
                # we rewrite this each time
                report_slice_data['candidate_hosts'] = json.dumps(
                    candidate_hosts)
            if failed_hosts:
                # for failed hosts this list can keep growing, so we add the
                # newly failed hosts to the previous value
                failed = json.loads(report_slice.failed_hosts)
                for host in failed_hosts:
                    failed.append(host)
                report_slice_data['failed_hosts'] = json.dumps(failed)
            if ready_to_archive:
                report_slice_data['ready_to_archive'] = ready_to_archive
            state_info = json.loads(report_slice.state_info)
            state_info.append(state)
            report_slice_data['state_info'] = json.dumps(state_info)
            serializer = ReportSliceSerializer(instance=report_slice,
                                               data=report_slice_data,
                                               partial=True)
            serializer.is_valid(raise_exception=True)
            serializer.save()
            LOG.info(
                format_message(self.prefix,
                               'Successfully updated report slice %s' %
                               report_slice.report_slice_id,
                               account_number=self.account_number,
                               report_platform_id=self.report_platform_id))
        except Exception as error:  # pylint: disable=broad-except
            DB_ERRORS.inc()
            self.should_run = False
            LOG.error(
                format_message(
                    self.prefix,
                    'Could not update report slice record due to the following error %s.'
                    % str(error),
                    account_number=self.account_number,
                    report_platform_id=self.report_platform_id))
            print_error_loop_event()
    async def _upload_to_host_inventory_via_kafka(self, hosts):
        """
        Upload to the host inventory via kafka.

        :param: hosts <list> the hosts to upload.
        """
        self.prefix = 'UPLOAD TO INVENTORY VIA KAFKA'
        await self.producer.stop()
        self.producer = AIOKafkaProducer(
            loop=SLICE_PROCESSING_LOOP,
            bootstrap_servers=INSIGHTS_KAFKA_ADDRESS,
            max_request_size=KAFKA_PRODUCER_OVERRIDE_MAX_REQUEST_SIZE)
        try:
            await self.producer.start()
        except (KafkaConnectionError, TimeoutError):
            KAFKA_ERRORS.inc()
            self.should_run = False
            print_error_loop_event()
            raise KafkaMsgHandlerError(
                format_message(self.prefix,
                               'Unable to connect to kafka server.',
                               account_number=self.account_number,
                               report_platform_id=self.report_platform_id))
        total_hosts = len(hosts)
        count = 0
        send_futures = []
        associated_msg = []
        report = self.report_or_slice.report
        cert_cn = None
        try:
            b64_identity = json.loads(
                report.upload_srv_kafka_msg)['b64_identity']
            raw_b64_identity = base64.b64decode(b64_identity).decode('utf-8')
            identity = json.loads(raw_b64_identity)
            cert_cn = identity['identity']['system']['cn']
        except KeyError as err:
            LOG.error(
                format_message(self.prefix,
                               'Invalid identity. Key not found: %s' % err))

        unique_id_base = '{}:{}:{}:'.format(
            report.request_id, report.report_platform_id,
            self.report_or_slice.report_slice_id)
        try:  # pylint: disable=too-many-nested-blocks
            for host_id, host in hosts.items():
                if HOSTS_TRANSFORMATION_ENABLED:
                    host = self._transform_single_host(report.request_id,
                                                       host_id, host)
                    if cert_cn and ('system_profile' in host):
                        host['system_profile']['owner_id'] = cert_cn
                system_unique_id = unique_id_base + host_id
                count += 1
                upload_msg = {
                    'operation': 'add_host',
                    'data': host,
                    'platform_metadata': {
                        'request_id': system_unique_id,
                        'b64_identity': b64_identity
                    }
                }
                msg = bytes(json.dumps(upload_msg), 'utf-8')
                future = await self.producer.send(UPLOAD_TOPIC, msg)
                send_futures.append(future)
                associated_msg.append(upload_msg)
                if count % HOSTS_UPLOAD_FUTURES_COUNT == 0 or count == total_hosts:
                    LOG.info(
                        format_message(
                            self.prefix,
                            'Sending %s/%s hosts to the inventory service.' %
                            (count, total_hosts),
                            account_number=self.account_number,
                            report_platform_id=self.report_platform_id))
                    try:
                        await asyncio.wait(send_futures,
                                           timeout=HOSTS_UPLOAD_TIMEOUT)
                        future_index = 0
                        for future_res in send_futures:
                            if future_res.exception():
                                LOG.error(
                                    'An exception occurred %s when trying to upload '
                                    'the following message: %s',
                                    future_res.exception(),
                                    associated_msg[future_index])
                            future_index += 1
                    except Exception as error:  # pylint: disable=broad-except
                        LOG.error('An exception occurred: %s', error)
                    send_futures = []
        except Exception as err:  # pylint: disable=broad-except
            LOG.error(
                format_message(self.prefix,
                               'The following error occurred: %s' % err))
            KAFKA_ERRORS.inc()
            self.should_run = False
            print_error_loop_event()
            raise KafkaMsgHandlerError(
                format_message(self.prefix,
                               'The following exception occurred: %s' % err,
                               account_number=self.account_number,
                               report_platform_id=self.report_platform_id))
        finally:
            await self.producer.stop()
Exemple #7
0
    def update_object_state(self, options):  # noqa: C901 (too-complex)
        """
        Update the report processor state and save.

        :param options: <dict> potentially containing the following:
            retry: <enum> Retry.clear=clear count, RETRY.increment=increase count
            retry_type: <str> either time=retry after time,
                git_commit=retry after new commit
            report_json: <dict> dictionary containing the report json
            report_platform_id: <str> string containing report_platform_id
            candidate_hosts: <dict> dictionary containing hosts that were
                successfully verified and uploaded
            failed_hosts: <dict> dictionary containing hosts that failed
                verification or upload
            status: <str> either success or failure based on the report
            host_inventory_api_version: <str> the inventory api version
            source: <str> containing either qpc or satellite
            source_metadata: <dict> containing metadata info about the source
            ready_to_archive: <bool> bool regarding archive
        """
        try:
            status_info = Status()
            self.state = self.next_state

            # grab all of the potential options
            retry_type = options.get('retry_type', self.object_class.TIME)
            retry = options.get('retry', RETRY.clear)
            report_json = options.get('report_json')
            report_platform_id = options.get('report_platform_id')
            candidate_hosts = options.get('candidate_hosts')
            failed_hosts = options.get('failed_hosts')
            status = options.get('status')
            host_inventory_api_version = options.get('host_inventory_api_version')
            source = options.get('source')
            source_metadata = options.get('source_metadata')
            ready_to_archive = options.get('ready_to_archive')
            start_processing = options.get('start_processing')

            update_data = {
                'last_update_time': datetime.now(pytz.utc),
                'state': self.next_state,
                'git_commit': status_info.git_commit
            }
            # if this is the start of the processing, update the processing
            # start time
            if start_processing:
                update_data['processing_start_time'] = datetime.now(pytz.utc)

            if retry == RETRY.clear:
                # After a successful transaction when we have reached the update
                # point, we want to set the Retry count back to 0 because
                # any future failures should be unrelated
                update_data['retry_count'] = 0
                update_data['retry_type'] = self.object_class.TIME
            elif retry == RETRY.increment:
                retry_count = self.report_or_slice.retry_count
                update_data['retry_count'] = retry_count + 1
                update_data['retry_type'] = retry_type

            # the other choice for retry is RETRY.keep_same in which case we don't
            # want to do anything to the retry count bc we want to preserve as is
            if report_json:
                update_data['report_json'] = json.dumps(report_json)
            if report_platform_id:
                update_data['report_platform_id'] = report_platform_id
            if candidate_hosts is not None:
                # candidate_hosts will get smaller and smaller until it hopefully
                # is empty because we have taken care of all ofthe candidates so
                # we rewrite this each time
                update_data['candidate_hosts'] = json.dumps(candidate_hosts)
            if failed_hosts:
                # for failed hosts this list can keep growing, so we add the
                # newly failed hosts to the previous value
                failed = json.loads(self.report_or_slice.failed_hosts)
                for host in failed_hosts:
                    failed.append(host)
                update_data['failed_hosts'] = json.dumps(failed)
            if status:
                update_data['upload_ack_status'] = status
            if host_inventory_api_version:
                update_data['host_inventory_api_version'] = \
                    host_inventory_api_version
            if source:
                update_data['source'] = source
            if source_metadata:
                update_data['source_metadata'] = json.dumps(source_metadata)
            if ready_to_archive:
                update_data['ready_to_archive'] = ready_to_archive

            state_info = json.loads(self.report_or_slice.state_info)
            state_info.append(self.next_state)
            update_data['state_info'] = json.dumps(state_info)

            serializer = self.object_serializer(
                instance=self.report_or_slice,
                data=update_data,
                partial=True)

            serializer.is_valid(raise_exception=True)
            serializer.save()

        except Exception as error:
            DB_ERRORS.inc()
            self.should_run = False
            LOG.error(format_message(
                self.prefix,
                'Could not update %s record due to the following error %s.' % (
                    self.object_prefix.lower(), str(error)),
                account_number=self.account_number, report_platform_id=self.report_platform_id))
            print_error_loop_event()