def process_data_request(request_meta, users): """ Main entry point of the module, prepares results for a given request. Coordinates a request based on the following parameters:: metric_handle (string) - determines the type of metric object to build. Keys metric_dict. users (list) - list of user IDs. **kwargs - Keyword arguments may contain a variety of variables. Most notably, "aggregator" if the request requires aggregation, "time_series" flag indicating a time series request. The remaining kwargs specify metric object parameters. """ # Set interval length in hours if not present if not request_meta.slice: request_meta.slice = DEFAULT_INERVAL_LENGTH else: request_meta.slice = float(request_meta.slice) # Get the aggregator key agg_key = get_agg_key(request_meta.aggregator, request_meta.metric) if \ request_meta.aggregator else None args = ParameterMapping.map(request_meta) # Initialize the results results, metric_class, metric_obj = format_response(request_meta) start = metric_obj.datetime_start end = metric_obj.datetime_end if results['type'] == request_types.time_series: # Get aggregator try: aggregator_func = get_aggregator_type(agg_key) except MetricsAPIError as e: results['data'] = 'Request failed. ' + e.message return results # Determine intervals and thread allocation total_intervals = (date_parse(end) - date_parse(start)).\ total_seconds() / (3600 * request_meta.slice) time_threads = max(1, int(total_intervals / INTERVALS_PER_THREAD)) time_threads = min(MAX_THREADS, time_threads) logging.info(__name__ + ' :: Initiating time series for %(metric)s\n' '\tAGGREGATOR = %(agg)s\n' '\tFROM: %(start)s,\tTO: %(end)s.' % { 'metric': metric_class.__name__, 'agg': request_meta.aggregator, 'start': str(start), 'end': str(end), }) metric_threads = '"k_" : {0}, "kr_" : {1}'.format(USER_THREADS, REVISION_THREADS) metric_threads = '{' + metric_threads + '}' new_kwargs = deepcopy(args) del new_kwargs['slice'] del new_kwargs['aggregator'] del new_kwargs['datetime_start'] del new_kwargs['datetime_end'] out = tspm.build_time_series(start, end, request_meta.slice, metric_class, aggregator_func, users, kt_=time_threads, metric_threads=metric_threads, log=True, **new_kwargs) results['header'] = ['timestamp'] + \ getattr(aggregator_func, um.METRIC_AGG_METHOD_HEAD) for row in out: timestamp = date_parse(row[0][:19]).strftime( DATETIME_STR_FORMAT) results['data'][timestamp] = row[3:] elif results['type'] == request_types.aggregator: # Get aggregator try: aggregator_func = get_aggregator_type(agg_key) except MetricsAPIError as e: results['data'] = 'Request failed. ' + e.message return results logging.info(__name__ + ' :: Initiating aggregator for %(metric)s\n' '\AGGREGATOR = %(agg)s\n' '\tFROM: %(start)s,\tTO: %(end)s.' % { 'metric': metric_class.__name__, 'agg': request_meta.aggregator, 'start': str(start), 'end': str(end), }) try: metric_obj.process(users, k_=USER_THREADS, kr_=REVISION_THREADS, log_=True, **args) except UserMetricError as e: logging.error(__name__ + ' :: Metrics call failed: ' + str(e)) results['data'] = str(e) return results r = um.aggregator(aggregator_func, metric_obj, metric_obj.header()) results['header'] = to_string(r.header) results['data'] = r.data[1:] elif results['type'] == request_types.raw: logging.info(__name__ + ':: Initiating raw request for %(metric)s\n' '\tFROM: %(start)s,\tTO: %(end)s.' % { 'metric': metric_class.__name__, 'start': str(start), 'end': str(end), }) try: metric_obj.process(users, k_=USER_THREADS, kr_=REVISION_THREADS, log_=True, **args) except UserMetricError as e: logging.error(__name__ + ' :: Metrics call failed: ' + str(e)) results['data'] = str(e) return results for m in metric_obj.__iter__(): results['data'][m[0]] = m[1:] return results
def time_series_worker(time_series, metric, aggregator, cohort, event_queue, kwargs): """ Worker thread which computes time series data for a set of points Parameter ~~~~~~~~~ time_series : list(datetime) Datetimes defining series. metric : string Metric name. aggregator : method aggregator method reference. cohort : string Cohort name. event_queue : multiporcessing.Queue Asynchronous data-structure to communicate with parent proc. """ log = bool(kwargs['log']) if 'log' in kwargs else False data = list() ts_s = time_series.next() new_kwargs = deepcopy(kwargs) # re-map some keyword args relating to thread counts if 'metric_threads' in new_kwargs: d = json.loads(new_kwargs['metric_threads']) for key in d: new_kwargs[key] = d[key] del new_kwargs['metric_threads'] while 1: try: ts_e = time_series.next() except StopIteration: break if log: logging.info(__name__ + ' :: Processing thread:\n' '\t{0}, {1} - {2} ...'.format(os.getpid(), str(ts_s), str(ts_e))) metric_obj = metric(datetime_start=ts_s, datetime_end=ts_e, **new_kwargs).\ process(cohort, **new_kwargs) r = um.aggregator(aggregator, metric_obj, metric.header()) if log: logging.info(__name__ + ' :: Processing complete:\n' '\t{0}, {1} - {2} ...'.format(os.getpid(), str(ts_s), str(ts_e))) data.append([str(ts_s), str(ts_e)] + r.data) ts_s = ts_e event_queue.put(data)