Example #1
0
    def record_transaction(self, data):
        """Record a single transaction against this application."""

        if not self._active_session:
            return

        if self._stats_engine.settings is None:
            return

        # Do checks to see whether trying to record a transaction in a
        # different process to that the application was activated in.

        self.validate_process()

        internal_metrics = ValueMetrics()

        with InternalTraceContext(internal_metrics):
            try:
                # We accumulate stats into a workarea and only then merge it
                # into the main one under a thread lock. Do this to ensure
                # that the process of generating the metrics into the stats
                # don't unecessarily lock out another thread.

                stats = self._stats_engine.create_workarea()
                stats.record_transaction(data)

            except Exception:
                _logger.exception('The generation of transaction data has '
                        'failed. This would indicate some sort of internal '
                        'implementation issue with the agent. Please report '
                        'this problem to New Relic support for further '
                        'investigation.')

            with self._stats_lock:
                try:
                    self._transaction_count += 1
                    self._last_transaction = data.end_time

                    internal_metric('Supportability/Transaction/Counts/'
                            'metric_data', stats.metric_data_count())

                    self._stats_engine.merge_metric_stats(stats)
                    self._stats_engine.merge_other_stats(stats)

                    # We merge the internal statistics here as well even
                    # though have popped out of the context where we are
                    # recording. This is okay so long as don't record
                    # anything else after this point. If we do then that
                    # data will not be recorded.

                    self._stats_engine.merge_value_metrics(
                            internal_metrics.metrics())

                except Exception:
                    _logger.exception('The merging of transaction data has '
                            'failed. This would indicate some sort of '
                            'internal implementation issue with the agent. '
                            'Please report this problem to New Relic support '
                            'for further investigation.')
Example #2
0
    def harvest(self, shutdown=False):
        """Performs a harvest, reporting aggregated data for the current
        reporting period to the data collector.

        """

        if self._agent_shutdown:
            return

        if not self._active_session:
            _logger.debug('Cannot perform a data harvest for %r as '
                    'there is no active session.', self._app_name)

            return

        internal_metrics = ValueMetrics()

        with InternalTraceContext(internal_metrics):
            with InternalTrace('Supportability/Harvest/Calls/harvest'):

                self._harvest_count += 1

                start = time.time()

                _logger.debug('Commencing data harvest for %r.',
                        self._app_name)

                # Create a snapshot of the transaction stats and
                # application specific custom metrics stats, then merge
                # them together. The originals will be reset at the time
                # this is done so that any new metrics that come in from
                # this point onwards will be accumulated in a fresh
                # bucket.

                transaction_count = self._transaction_count

                with self._stats_lock:
                    self._transaction_count = 0
                    self._last_transaction = 0.0

                    stats = self._stats_engine.harvest_snapshot()

                with self._stats_custom_lock:
                    stats_custom = self._stats_custom_engine.harvest_snapshot()

                stats.merge_metric_stats(stats_custom)

                # Now merge in any metrics from the data samplers
                # associated with this application.
                #
                # NOTE If a data sampler has problems then what data was
                # collected up to that point is retained. The data
                # collector itself is still retained and would be used
                # again on future harvest. If it is a persistent problem
                # with the data sampler the issue would then reoccur
                # with every harvest. If data sampler is a user provided
                # data sampler, then should perhaps deregister it if it
                # keeps having problems.

                for sampler in self._samplers:
                    try:
                        for metric in sampler.value_metrics():
                            stats.record_value_metric(metric)

                    except Exception:
                        _logger.exception('The merging of value metrics from '
                                'a data sampler has failed. If this issue '
                                'persists then please report this problem to '
                                'New Relic support for further investigation.')

                # Add a metric we can use to track how many harvest
                # periods have occurred.

                stats.record_value_metric(ValueMetric(
                        name='Instance/Reporting', value=0))

                # Create our time stamp as to when this reporting period
                # ends and start reporting the data.

                period_end = time.time()

                # If this harvest is being forcibly triggered on process
                # shutdown, there are transactions recorded, and the
                # duration of the harvest period is less than 1 second,
                # then artificially push out the end time of the harvest
                # period. This is done so that the harvest period is not
                # less than 1 second, otherwise the data collector will
                # throw the data away. This is desirable for case where
                # trying to monitor scripts which perform a one off task
                # and then immediately exit. Also useful when running
                # test scripts.

                if shutdown and transaction_count != 0:
                    if period_end - self._period_start < 1.0:
                        _logger.debug('Stretching harvest duration for '
                                'forced harvest on shutdown.')
                        period_end = self._period_start + 1.001

                try:
                    configuration = self._active_session.configuration

                    # Send the transaction and custom metric data.

                    # Create a metric_normalizer based on normalize_name
                    # If metric rename rules are empty, set normalizer
                    # to None and the stats engine will skip steps as
                    # appropriate.

                    if self._rules_engine['metric'].rules:
                        metric_normalizer = partial(self.normalize_name,
                                rule_type='metric')
                    else:
                        metric_normalizer = None

                    # Pass the metric_normalizer to stats.metric_data to
                    # do metric renaming.

                    metric_data = stats.metric_data(metric_normalizer)

                    internal_metric('Supportability/Harvest/Counts/'
                            'metric_data', len(metric_data))

                    metric_ids = self._active_session.send_metric_data(
                      self._period_start, period_end, metric_data)

                    # Successful, so we update the stats engine with the
                    # new metric IDs and reset the reporting period
                    # start time. If an error occurs after this point,
                    # any remaining data for the period being reported
                    # on will be thrown away. We reset the count of
                    # number of merges we have done due to failures as
                    # only really want to count errors in being able to
                    # report the main transaction metrics.

                    self._merge_count = 0
                    self._period_start = period_end
                    self._stats_engine.update_metric_ids(metric_ids)

                    # Send the accumulated error data.

                    if configuration.collect_errors:
                        error_data = stats.error_data()

                        internal_metric('Supportability/Harvest/Counts/'
                                'error_data', len(error_data))

                        if error_data:
                            self._active_session.send_errors(error_data)

                    if configuration.collect_traces:
                        slow_sql_data = stats.slow_sql_data()

                        internal_metric('Supportability/Harvest/Counts/'
                                'sql_trace_data', len(slow_sql_data))

                        if slow_sql_data:
                            self._active_session.send_sql_traces(slow_sql_data)

                        slow_transaction_data = stats.transaction_trace_data()

                        internal_metric('Supportability/Harvest/Counts/'
                                'transaction_sample_data',
                                len(slow_transaction_data))

                        if slow_transaction_data:
                            self._active_session.send_transaction_traces(
                                    slow_transaction_data)

                    # Get agent commands from collector.

                    agent_commands = self._active_session.get_agent_commands()

                    # For each agent command received, call the
                    # appropiate agent command handler. Reply to the
                    # data collector with the acknowledgement of the
                    # agent command.

                    for command in agent_commands:
                        cmd_id = command[0]
                        cmd_name = command[1]['name']
                        cmd_args = command[1]['arguments']

                        # An agent command is mapped to a method of this
                        # class. If we don't know about a specific agent
                        # command we just ignore it.

                        cmd_handler = getattr(self, cmd_name, None)

                        if cmd_handler is None:
                            _logger.debug('Received unknown agent command '
                                    '%r from the data collector for %r.',
                                    cmd_name, self._app_name)
                            continue

                        cmd_res = cmd_handler(cmd_id, **cmd_args)

                        if cmd_res:
                            self._active_session.send_agent_command_results(
                                    cmd_res)

                    # If a profiling session is already running, check
                    # if it is completed and send the accumulated
                    # profile data back to the data collector. Note that
                    # this come after we process the agent commands as
                    # we might receive an agent command to stop the
                    # profiling session, but still send the data back.
                    # Having the sending of the results last ensures we
                    # send back that data from the stopped profiling
                    # session immediately.

                    if self._profiler_started:
                        profile_data = self._thread_profiler.profile_data()

                        if profile_data and self._send_profile_data:
                            _logger.debug('Reporting thread profiling '
                                    'session data for %r.', self._app_name)

                            self._active_session.send_profile_data(profile_data)

                            self._profiler_started = False
                            self._send_profile_data = False

                    # If this is a final forced harvest for the process
                    # then attempt to shutdown the session.
                    #
                    # If a thread profiling session is running, we need
                    # to make sure we stop that from running as well.

                    if shutdown:
                        if self._profiler_started:
                            _logger.info('Aborting thread profiling session '
                                    'for %r.', self._app_name)

                            self._thread_profiler.stop_profiling(
                                    wait_for_completion=False)

                            self._thread_profiler = None
                            self._profiler_started = False
                            self._send_profile_data = False

                        try:
                            self._active_session.shutdown_session()
                        except Exception:
                            pass

                        self._active_session = None

                except ForceAgentRestart:
                    # The data collector has indicated that we need to
                    # perform an internal agent restart. We attempt to
                    # properly shutdown the session and then initiate a
                    # new session.
                    #
                    # If a thread profiling session is running, we need
                    # to make sure we stop that from running as well as
                    # any data will not be able to be reported later if
                    # do reconnect as will be a different agent run.

                    if self._profiler_started:
                        _logger.info('Aborting thread profiling session '
                                'for %r.', self._app_name)

                        self._thread_profiler.stop_profiling(
                                wait_for_completion=False)

                        self._thread_profiler = None
                        self._profiler_started = False
                        self._send_profile_data = False

                    try:
                        self._active_session.shutdown_session()
                    except Exception:
                        pass

                    self._agent_restart += 1
                    self._active_session = None

                    self.activate_session()

                except ForceAgentDisconnect:
                    # The data collector has indicated that we need to
                    # force disconnect and stop reporting. We attempt to
                    # properly shutdown the session, but don't start a
                    # new one and flag ourselves as shutdown. This
                    # notification is presumably sent when a specific
                    # application is behaving so badly that it needs to
                    # be stopped entirely. It would require a complete
                    # process start to be able to attempt to connect
                    # again and if the server side kill switch is still
                    # enabled it would be told to disconnect once more.
                    #
                    # If a thread profiling session is running, we need
                    # to make sure we stop that from running as well as
                    # the agent will no longer be reporting without a
                    # restart of the process so no point.

                    if self._profiler_started:
                        _logger.info('Aborting thread profiling session '
                                'for %r.', self._app_name)

                        self._thread_profiler.stop_profiling(
                                wait_for_completion=False)

                        self._thread_profiler = None
                        self._profiler_started = False
                        self._send_profile_data = False

                    try:
                        self._active_session.shutdown_session()
                    except Exception:
                        pass

                    self._active_session = None

                    self._agent_shutdown = True

                except RetryDataForRequest:
                    # A potentially recoverable error occurred. We merge
                    # the stats back into that for the current period
                    # and abort the current harvest if the problem
                    # occurred when initially reporting the main
                    # transaction metrics. If the problem occurred when
                    # reporting other information then that and any
                    # other non reported information is thrown away.
                    #
                    # In order to prevent memory growth will we only
                    # merge data up to a set maximum number of
                    # successive times. When this occurs we throw away
                    # all the metric data and start over. We also only
                    # merge main metric data and discard errors, slow
                    # SQL and transaction traces from older harvest
                    # period.

                    if self._period_start != period_end:

                        self._merge_count += 1

                        agent_limits = configuration.agent_limits
                        maximum = agent_limits.merge_stats_maximum

                        if self._merge_count <= maximum:
                            self._stats_engine.merge_metric_stats(stats)

                        else:
                            _logger.error('Unable to report main transaction '
                                    'metrics after %r successive attempts. '
                                    'Check the log messages and if necessary '
                                    'please report this problem to New Relic '
                                    'support for further investigation.',
                                    maximum)

                            self._discard_count += self._merge_count

                            self._merge_count = 0

                except DiscardDataForRequest:
                    # An issue must have occurred in reporting the data
                    # but if we retry with same data the same error is
                    # likely to occur again so we just throw any data
                    # not sent away for this reporting period.

                    self._discard_count += 1

                except Exception:
                    # An unexpected error, likely some sort of internal
                    # agent implementation issue.

                    _logger.exception('Unexpected exception when attempting '
                            'to harvest the metric data and send it to the '
                            'data collector. Please report this problem to '
                            'New Relic support for further investigation.')

                duration = time.time() - start

                _logger.debug('Completed harvest for %r in %.2f seconds.',
                        self._app_name, duration)

                # Force close the socket connection which has been
                # created for this harvest if session still exists.
                # New connection will be create automatically on the
                # next harvest.

                if self._active_session:
                    self._active_session.close_connection()

        # Merge back in statistics recorded about the last harvest
        # and communication with the data collector. This will be
        # part of the data for the next harvest period.

        with self._stats_lock:
            self._stats_engine.merge_value_metrics(internal_metrics.metrics())