def process_metrics(p, request_meta): """ Worker process for requests, forked from the job controller. This method handles: * Filtering cohort type: "regular" cohort, single user, user group * Secondary validation * """ log_name = '{0} :: {1}'.format(__name__, process_metrics.__name__) logging.info(log_name + ' - START JOB' '\n\tCOHORT = {0} - METRIC = {1}' ' - PID = {2})'. format(request_meta.cohort_expr, request_meta.metric, getpid())) err_msg = __name__ + ' :: Request failed.' users = list() # obtain user list - handle the case where a lone user ID is passed # !! The username should already be validated if request_meta.is_user: uid = MediaWikiUser.is_user_name(request_meta.cohort_expr, request_meta.project) if uid: valid = True users = [uid] else: valid = False err_msg = error_codes[3] # The "all" user group. All users within a time period. elif request_meta.cohort_expr == 'all': users = MediaWikiUser(query_type=1) try: users = [u for u in users.get_users( request_meta.start, request_meta.end, project=request_meta.project)] valid = True except Exception: valid = False err_msg = error_codes[5] # "TYPICAL" COHORT PROCESSING else: users = get_users(request_meta.cohort_expr) # Default project is what is stored in usertags_meta project = query_mod.get_cohort_project_by_meta( request_meta.cohort_expr) if project: request_meta.project = project logging.debug(__name__ + ' :: Using default project from ' \ 'usertags_meta {0}.'.format(project)) valid = True err_msg = '' if valid: # process request results = process_data_request(request_meta, users) results = str(results) response_size = getsizeof(results, None) if response_size > MAX_BLOCK_SIZE: index = 0 # Dump the data in pieces - block until it is picked up while index < response_size: p.put(results[index:index+MAX_BLOCK_SIZE], block=True) index += MAX_BLOCK_SIZE else: p.put(results, block=True) logging.info(log_name + ' - END JOB' '\n\tCOHORT = {0} - METRIC = {1}' ' - PID = {2})'. format(request_meta.cohort_expr, request_meta.metric, getpid())) else: p.put(err_msg, block=True) logging.info(log_name + ' - END JOB - FAILED.' '\n\tCOHORT = {0} - METRIC = {1}' ' - PID = {2})'. format(request_meta.cohort_expr, request_meta.metric, getpid()))
def output(cohort, metric): """ View corresponding to a data request - All of the setup and execution for a request happens here. """ # Get URL. Check for refresh flag - drop from url url = request.url.split(request.url_root)[1] refresh = True if 'refresh' in request.args else False if refresh: url = sub(REFRESH_REGEX, '', url) # Get the refresh date of the cohort try: cid = query_mod.get_cohort_id(cohort) cohort_refresh_ts = get_cohort_refresh_datetime(cid) except Exception: cohort_refresh_ts = None logging.error(__name__ + ' :: Could not retrieve refresh ' 'time of cohort.') # Build a request and validate. # # 1. Populate with request parameters from query args. # 2. Filter the input discarding any url junk # 3. Process defaults for request parameters # 4. See if this maps to a single user request # 5. See if this maps to a single user request try: rm = RequestMetaFactory(cohort, cohort_refresh_ts, metric) except MetricsAPIError as e: return redirect(url_for('all_cohorts') + '?error=' + str(e.error_code)) filter_request_input(request, rm) try: format_request_params(rm) except MetricsAPIError as e: return redirect(url_for('all_cohorts') + '?error=' + str(e.error_code)) if rm.is_user: project = rm.project if rm.project else 'enwiki' if not MediaWikiUser.is_user_name(cohort, project): logging.error(__name__ + ' :: "{0}" is not a valid username ' 'in "{1}"'.format(cohort, project)) return redirect(url_for('all_cohorts') + '?error=3') else: # @TODO CALL COHORT VALIDATION HERE pass # Determine if the request maps to an existing response. # # 1. The response already exists in the hash, return. # 2. Otherwise, add the request tot the queue. data = get_data(rm) key_sig = build_key_signature(rm, hash_result=True) # Is the request already running? is_running = req_cb_get_is_running(key_sig, VIEW_LOCK) # Determine if request is already hashed if data and not refresh: return make_response(jsonify(data)) # Determine if the job is already running elif is_running: return render_template('processing.html', error=error_codes[0], url_str=str(rm)) # Add the request to the queue else: api_request_queue.put(unpack_fields(rm), block=True) req_cb_add_req(key_sig, url, VIEW_LOCK) return render_template('processing.html', url_str=str(rm))