Ejemplo n.º 1
0
def process_data_request(request_meta, users):
    """
        Main entry point of the module, prepares results for a given request.
        Coordinates a request based on the following parameters::

            metric_handle (string) - determines the type of metric object to
            build.  Keys metric_dict.

            users (list) - list of user IDs.

            **kwargs - Keyword arguments may contain a variety of variables.
            Most notably, "aggregator" if the request requires aggregation,
            "time_series" flag indicating a time series request.  The
            remaining kwargs specify metric object parameters.
    """

    # Set interval length in hours if not present
    if not request_meta.slice:
        request_meta.slice = DEFAULT_INERVAL_LENGTH
    else:
        request_meta.slice = float(request_meta.slice)

    # Get the aggregator key
    agg_key = get_agg_key(request_meta.aggregator, request_meta.metric) if \
        request_meta.aggregator else None

    args = ParameterMapping.map(request_meta)

    # Initialize the results
    results, metric_class, metric_obj = format_response(request_meta)

    start = metric_obj.datetime_start
    end = metric_obj.datetime_end

    if results['type'] == request_types.time_series:

        # Get aggregator
        try:
            aggregator_func = get_aggregator_type(agg_key)
        except MetricsAPIError as e:
            results['data'] = 'Request failed. ' + e.message
            return results

        # Determine intervals and thread allocation
        total_intervals = (date_parse(end) - date_parse(start)).\
                          total_seconds() / (3600 * request_meta.slice)
        time_threads = max(1, int(total_intervals / INTERVALS_PER_THREAD))
        time_threads = min(MAX_THREADS, time_threads)

        logging.info(__name__ + ' :: Initiating time series for %(metric)s\n'
                                '\tAGGREGATOR = %(agg)s\n'
                                '\tFROM: %(start)s,\tTO: %(end)s.' %
                                {
                                    'metric': metric_class.__name__,
                                    'agg': request_meta.aggregator,
                                    'start': str(start),
                                    'end': str(end),
                                    })
        metric_threads = '"k_" : {0}, "kr_" : {1}'.format(USER_THREADS,
            REVISION_THREADS)
        metric_threads = '{' + metric_threads + '}'

        new_kwargs = deepcopy(args)

        del new_kwargs['slice']
        del new_kwargs['aggregator']
        del new_kwargs['datetime_start']
        del new_kwargs['datetime_end']

        out = tspm.build_time_series(start,
            end,
            request_meta.slice,
            metric_class,
            aggregator_func,
            users,
            kt_=time_threads,
            metric_threads=metric_threads,
            log=True,
            **new_kwargs)

        results['header'] = ['timestamp'] + \
                            getattr(aggregator_func,
                                    um.METRIC_AGG_METHOD_HEAD)
        for row in out:
            timestamp = date_parse(row[0][:19]).strftime(
                DATETIME_STR_FORMAT)
            results['data'][timestamp] = row[3:]

    elif results['type'] == request_types.aggregator:

        # Get aggregator
        try:
            aggregator_func = get_aggregator_type(agg_key)
        except MetricsAPIError as e:
            results['data'] = 'Request failed. ' + e.message
            return results

        logging.info(__name__ + ' :: Initiating aggregator for %(metric)s\n'
                                '\AGGREGATOR = %(agg)s\n'
                                '\tFROM: %(start)s,\tTO: %(end)s.' %
                                {
                                    'metric': metric_class.__name__,
                                    'agg': request_meta.aggregator,
                                    'start': str(start),
                                    'end': str(end),
                                    })

        try:
            metric_obj.process(users,
                               k_=USER_THREADS,
                               kr_=REVISION_THREADS,
                               log_=True,
                               **args)
        except UserMetricError as e:
            logging.error(__name__ + ' :: Metrics call failed: ' + str(e))
            results['data'] = str(e)
            return results

        r = um.aggregator(aggregator_func, metric_obj, metric_obj.header())
        results['header'] = to_string(r.header)
        results['data'] = r.data[1:]

    elif results['type'] == request_types.raw:

        logging.info(__name__ + ':: Initiating raw request for %(metric)s\n'
                                '\tFROM: %(start)s,\tTO: %(end)s.' %
                                {
                                    'metric': metric_class.__name__,
                                    'start': str(start),
                                    'end': str(end),
                                    })
        try:
            metric_obj.process(users,
                               k_=USER_THREADS,
                               kr_=REVISION_THREADS,
                               log_=True,
                               **args)
        except UserMetricError as e:
            logging.error(__name__ + ' :: Metrics call failed: ' + str(e))
            results['data'] = str(e)
            return results

        for m in metric_obj.__iter__():
            results['data'][m[0]] = m[1:]

    return results
def time_series_worker(time_series,
                       metric,
                       aggregator,
                       cohort,
                       event_queue,
                       kwargs):
    """
        Worker thread which computes time series data for a set of points

        Parameter
        ~~~~~~~~~

            time_series : list(datetime)
                Datetimes defining series.

            metric : string
                Metric name.

            aggregator : method
                aggregator method reference.

            cohort : string
                Cohort name.

            event_queue : multiporcessing.Queue
                Asynchronous data-structure to communicate with parent proc.
    """
    log = bool(kwargs['log']) if 'log' in kwargs else False

    data = list()
    ts_s = time_series.next()
    new_kwargs = deepcopy(kwargs)

    # re-map some keyword args relating to thread counts
    if 'metric_threads' in new_kwargs:
        d = json.loads(new_kwargs['metric_threads'])
        for key in d:
            new_kwargs[key] = d[key]
        del new_kwargs['metric_threads']

    while 1:
        try:
            ts_e = time_series.next()
        except StopIteration:
            break

        if log:
            logging.info(__name__ + ' :: Processing thread:\n'
                                    '\t{0}, {1} - {2} ...'.format(os.getpid(),
                                                                  str(ts_s),
                                                                  str(ts_e)))

        metric_obj = metric(datetime_start=ts_s, datetime_end=ts_e, **new_kwargs).\
            process(cohort, **new_kwargs)

        r = um.aggregator(aggregator, metric_obj, metric.header())

        if log:
            logging.info(__name__ + ' :: Processing complete:\n'
                                    '\t{0}, {1} - {2} ...'.format(os.getpid(),
                                                                  str(ts_s),
                                                                  str(ts_e)))
        data.append([str(ts_s), str(ts_e)] + r.data)
        ts_s = ts_e

    event_queue.put(data)