def _process_help(args): """ Used by Threshold::process() for forking. Should not be called externally. """ state = args[1] thread_args = RevertRateArgsClass(state[0], state[1], state[2], state[3], state[4], state[6], state[7], state[8]) users = args[0] if thread_args.log_progress: logging.info(__name__ + ' :: Computing reverts on %s users (PID %s)' % (len(users), str(os.getpid()))) results_agg = list() dropped_users = 0 umpd_obj = UMP_MAP[thread_args.group](users, thread_args) for user_data in umpd_obj: total_revisions = 0.0 total_reverts = 0.0 # Call query on revert rate for each user # # 1. Obtain user registration date # 2. Compute end date based on 't' # 3. Get user revisions in time period query_args = namedtuple('QueryArgs', 'date_start date_end')\ (format_mediawiki_timestamp(user_data.start), format_mediawiki_timestamp(user_data.end)) try: revisions = query_mod.\ revert_rate_user_revs_query(user_data.user, thread_args.project, query_args) except query_mod.UMQueryCallError as e: logging.error(__name__ + ' :: Failed to ' 'get revisions: {0}'.format(e.message)) dropped_users += 1 continue results_thread = mpw.build_thread_pool(revisions, _revision_proc, thread_args.rev_threads, state) for r in results_thread: total_revisions += r[0] total_reverts += r[1] if not total_revisions: results_agg.append([user_data.user, 0.0, total_revisions]) else: results_agg.append([user_data.user, total_reverts / total_revisions, total_revisions]) if thread_args.log_progress: logging.debug(__name__ + ' :: PID {0} complete. Dropped users = {1}'. format(str(os.getpid()), dropped_users)) return results_agg
def set_connection(self, retries=20, timeout=1, **kwargs): """ Establishes a database connection. Parameters (\*\*kwargs): - **db**: string value used to determine the database connection """ if 'instance' in kwargs: mysql_kwargs = {} for key in projSet.connections[kwargs['instance']]: mysql_kwargs[key] = projSet.connections[kwargs['instance']][ key] while retries: try: self._db_ = MySQLdb.connect(**mysql_kwargs) break except MySQLdb.OperationalError as e: logging.debug(__name__ + ' :: Connection dropped. ' 'Reopening MySQL connection. ' '{0} retries left, timeout = {1}: ' '"{2}"'.format(retries, timeout, e.message)) sleep(timeout) retries -= 1 if not retries: raise ConnectorError() self._cur_ = self._db_.cursor()
def log_pool_worker_end(metric_name, worker_name, extra=''): """ Logging method for job completion. """ logging.debug('{0} :: {1}\n' '\tPID = {2} complete.\n' '\t{3}\n'.format(metric_name, worker_name, getpid(), extra))
def wrapper(users, project, args): # ensure the handles are iterable if not hasattr(users, '__iter__'): users = [users] # get query and call if hasattr(args, 'log') and args.log: logging.debug(__name__ + ':: calling "%(method)s" ' 'in "%(project)s".' % { 'method': f.__name__, 'project': project } ) # Call query escaping user and project variables for SQL injection query = f(escape_var(users), escape_var(project), args) try: conn = Connector(instance=conf.PROJECT_DB_MAP[project]) except KeyError: logging.error(__name__ + ' :: Project does not exist.') return [] except ConnectorError: logging.error(__name__ + ' :: Could not establish a connection.') raise UMQueryCallError('Could not establish a connection.') try: conn._cur_.execute(query) except ProgrammingError: logging.error(__name__ + 'Could not get edit counts - Query failed.') raise UMQueryCallError() results = [row for row in conn._cur_] del conn return results
def get_data(request_meta, hash_result=True): """ Extract data from the global hash given a request object. If an item is successfully recovered data is returned """ hash_table_ref = read_pickle_data() # Traverse the hash key structure to find data # @TODO rather than iterate through REQUEST_META_BASE & # REQUEST_META_QUERY_STR look only at existing attributes logging.debug(__name__ + " - Attempting to pull data for request " \ "COHORT {0}, METRIC {1}". format(request_meta.cohort_expr, request_meta.metric)) key_sig = build_key_signature(request_meta, hash_result=hash_result) item = find_item(hash_table_ref, key_sig) if item: # item[0] will be a stringified structure that # is initialized, see set_data. return eval(item[0]) else: return None
def log_pool_worker_start(metric_name, worker_name, data, args): """ Logging method for processing pool workers. """ logging.debug('{0} :: {1}\n' '\tData = {2} rows\n' '\tArgs = {3}\n' '\tPID = {4}\n'.format(metric_name, worker_name, len(data), str(args), getpid()))
def authenticate(self, password): password = escape(unicode(password)) logging.debug(__name__ + ' :: Authenticating "{0}"/"{1}" ' 'on hash "{2}" ...'. format(self.name, password, self.pw_hash)) if self.check_password(password): self.authenticated = True else: self.authenticated = False
def _process_help(args): # Unpack args state = args[1] users = args[0] thread_args = um.UserMetric._unpack_params(state) # Log progress if thread_args.log_: logging.debug(__name__ + '::Computing live account. (PID = %s)' % getpid()) # Extract edit button click from edit_page_tracking table (namespace, # article title, timestamp) of first click and registration timestamps # (join on logging table) # # Query will return: (user id, time of registration, time of first # edit button click) query_args = namedtuple('QueryArgs', 'namespace')(thread_args.namespace) query_results = query_mod.live_account_query(users, thread_args.project, query_args) # Iterate over results to determine boolean indicating whether # account is "live" results = {str(user): -1 for user in users} user_reg = query_mod.user_registration_date_logging( users, thread_args.project, None) # uid: diff_time user_reg = {str(r[0]): (datetime.now() - date_parse(r[1])). total_seconds() / 3600 for r in user_reg} # Flag all users alive longer than t hours as "not invalid" for user in results: if user in user_reg and user_reg[user] >= thread_args.t: results[user] = 0 for row in query_results: user = str(row[0]) try: # get the difference in hours diff = (date_parse(row[2]) - date_parse(row[1])).total_seconds() diff /= 3600 except Exception: continue if diff <= thread_args.t: results[user] = 1 else: results[user] = 0 return [(str(key), results[key]) for key in results]
def wrapper(users, project, args): # ensure the handles are iterable if not hasattr(users, '__iter__'): users = [users] # escape project & users users = escape_var(users) project = escape_var(project) # compose a csv of user ids user_str = DataLoader().format_comma_separated_list(users) # get query and call if hasattr(args, 'log') and args.log: logging.debug(__name__ + ':: calling "%(method)s" ' 'in "%(project)s".' % { 'method': f.__name__, 'project': project } ) # 1. Synthesize query # 2. substitute project query, params = f(users, project, args) query = sub_tokens(query, db=project, users=user_str) try: conn = Connector(instance=conf.PROJECT_DB_MAP[project]) except KeyError: logging.error(__name__ + ' :: Project does not exist.') return [] except ConnectorError: logging.error(__name__ + ' :: Could not establish a connection.') raise UMQueryCallError(__name__ + ' :: Could not ' 'establish a connection.') try: if params: conn._cur_.execute(query, params) else: conn._cur_.execute(query) except (OperationalError, ProgrammingError) as e: logging.error(__name__ + ' :: Query failed: {0}, params = {1}'. format(query, str(params))) raise UMQueryCallError(__name__ + ' :: ' + str(e)) results = [row for row in conn._cur_] del conn return results
def register_user(self): """ Writes the user credentials to the datastore. """ # 1. Only users not already registered # 2. Ensure that the user is unique # 3. Write the user / pass to the db if not self.active: if not query_mod.get_api_user(self.name, by_id=False): query_mod.insert_api_user(self.name, self.pw_hash) logging.debug(__name__ + ' :: Added user {0}'. format(self.name)) else: logging.error(__name__ + 'Could not add user {0}'. format(self.name)) self.active = True
def add_cohort_users(cohort_name, user_records): conn = Connector(instance=conf.__cohort_data_instance__) ut_query = query_store[add_cohort_users.__query_name__] ut_query = sub_tokens(ut_query, db=conf.__cohort_meta_instance__, table=conf.__cohort_db__) ut_query += ','.join(['%s']* len(user_records)) cohort_id= get_cohort_id(cohort_name) value_list_ut = [(rec['project'], rec['user_id'], int(cohort_id)) for rec in user_records] try: logging.debug('ut_query:\n%s', ut_query) logging.debug('value_list_ut:\n%s', value_list_ut) conn._cur_.execute(ut_query, value_list_ut) conn._db_.commit() except (ProgrammingError, OperationalError) as e: conn._db_.rollback() raise UMQueryCallError(__name__ + ' :: ' + str(e)) del conn
def _process_help(args): """ First determine if the user has made an adequate number of edits. If so, compute the number of minutes that passed between the Nth and Mth edit. - Parameters: - **user_handle** - List(int). List of user ids. - **first_edit** - Integer. The numeric value of the first edit from which to measure the threshold. - **threshold_edit** - Integer. The numeric value of the threshold edit from which to measure the threshold """ # Unpack args state = args[1] users = args[0] thread_args = um.UserMetric._unpack_params(state) if thread_args.log_: logging.debug(__name__ + '::Computing Time to threshold on ' '{0} users. (PID = {1})'.format(len(users), getpid())) minutes_to_threshold = list() # For each user gather their revisions and produce a time diff for user in users: revs = query_mod.\ time_to_threshold_revs_query(user, thread_args.project, None) revs = [rev[0] for rev in revs] minutes_to_threshold.append( [user, get_minute_diff_result(revs, thread_args.threshold_edit, thread_args.first_edit)]) if thread_args.log_: logging.info(__name__ + '::Processed PID = {0}.'.format(getpid())) return minutes_to_threshold
def __init__(self, username, authenticated=False): self.name = escape(unicode(username)) self.authenticated = authenticated user_ref = query_mod.get_api_user(username, by_id=False) if user_ref: self.id = unicode(user_ref[1]) self.active = True self.pw_hash = unicode(str(user_ref[2])) else: self.id = None self.active = False self.pw_hash = None logging.debug(__name__ + ' :: Initiatializing user obj. ' 'user: "******", ' 'is active: "{1}", ' 'is auth: {2}'. format(username, self.active, self.authenticated))
def login(): if request.method == "POST" and "username" in request.form: username = escape(unicode(str(request.form["username"]))) passwd = escape(unicode(str(request.form["password"]))) remember = request.form.get("remember", "no") == "yes" # Initialize user user_ref = APIUser(username) user_ref.authenticate(passwd) logging.debug(__name__ + ' :: Authenticating "{0}"/"{1}" ...'.format(username, passwd)) if user_ref.is_authenticated(): login_user(user_ref, remember=remember) flash("Logged in.") return redirect(request.args.get("next") or url_for("api_root")) else: flash("Login failed.") return render_template("login.html")
def process_response(): """ Pulls responses off of the queue. """ log_name = '{0} :: {1}'.format(__name__, process_response.__name__) logging.debug(log_name + ' - STARTING...') while 1: time.sleep(RESPONSE_TIMEOUT) # Handle any responses as they enter the queue # logging.debug(log_name + ' - POLLING RESPONSES...') res_item = umapi_broker_context.pop(RESPONSE_BROKER_TARGET) if not res_item: continue req, data = unpack_response_for_broker(res_item) request_meta = build_request_obj(req) # Add result to cache once completed logging.debug(log_name + ' - Setting data for {0}'.format( str(request_meta))) set_data(data, request_meta) logging.debug(log_name + ' - SHUTTING DOWN...')
def _process_help(args): # Unpack args state = args[1] thread_args = LiveAccountArgsClass(state[0], state[1], state[2], state[3], state[4], state[5]) users = args[0] # Log progress if thread_args.log: logging.debug(__name__ + '::Computing live account. (PID = %s)' % getpid()) # Extract edit button click from edit_page_tracking table (namespace, # article title, timestamp) of first click and registration timestamps # (join on logging table) # # Query will return: (user id, time of registration, time of first # edit button click) query_args = namedtuple('QueryArgs', 'namespace')(thread_args.namespace) query_results = query_mod.live_account_query(users, thread_args.project, query_args) # Iterate over results to determine boolean indicating whether # account is "live" results = {long(user): -1 for user in users} for row in query_results: try: # get the difference in minutes diff = (date_parse(row[2]) - date_parse(row[1])).total_seconds() diff /= 60 except Exception: continue if diff <= thread_args.t: results[row[0]] = 1 else: results[row[0]] = 0 return [(str(key), results[key]) for key in results]
def login(): if request.method == 'POST' and 'username' in request.form: username = escape(unicode(str(request.form['username']))) passwd = escape(unicode(str(request.form['password']))) remember = request.form.get('remember', 'no') == 'yes' # Initialize user user_ref = APIUser(username) user_ref.authenticate(passwd) logging.debug(__name__ + ' :: Authenticating "{0}"/"{1}" ...'. format(username, passwd)) if user_ref.is_authenticated(): login_user(user_ref, remember=remember) flash('Logged in.') return redirect(request.args.get('next') or url_for('api_root')) else: flash('Login failed.') return render_template('login.html')
def set_data(data, request_meta, hash_result=True): """ Given request meta-data and a dataset create a key path in the global hash to store the data """ hash_table_ref = read_pickle_data() key_sig = build_key_signature(request_meta, hash_result=hash_result) logging.debug(__name__ + " :: Adding data to hash @ key signature = {0}". format(str(key_sig))) if hash_result: key_sig_full = build_key_signature(request_meta, hash_result=False) hash_table_ref[key_sig] = (data, key_sig_full) else: last_item = key_sig[-1] for item in key_sig: if item == last_item: hash_table_ref[last_item] = data else: hash_table_ref[item] = OrderedDict() hash_table_ref = hash_table_ref[item] write_pickle_data(hash_table_ref)
def _process_help(args): """ Worker thread method for edit count. """ # Unpack args users = args[0] state = args[1] metric_params = um.UserMetric._unpack_params(state) query_args_type = namedtuple("QueryArgs", "date_start date_end") logging.debug(__name__ + ":: Executing EditCount on " "%s users (PID = %s)" % (len(users), getpid())) # Call user period method umpd_obj = UMP_MAP[metric_params.group](users, metric_params) results = list() for t in umpd_obj: args = query_args_type(t.start, t.end) # Build edit count results list results += query_mod.edit_count_user_query(t.user, metric_params.project, args) return results
def create_cohort(cohort, project, notes="", owner=1, group=3): conn = Connector(instance=conf.__cohort_data_instance__) now = format_mediawiki_timestamp(datetime.now()) # TODO: ALLOW THE COHORT DEF TO BE REFRESHED IF IT ALREADY EXISTS logging.debug(__name__ + ' :: Adding new cohort "{0}".'. format(cohort)) if not notes: notes = 'Generated by: ' + __name__ # Create an entry in ``usertags_meta`` utm_query = query_store[create_cohort.__query_name__] try: params = { 'utm_name': str(cohort), 'utm_project': str(project), 'utm_notes': str(notes), 'utm_group': int(group), 'utm_owner': int(owner), 'utm_touched': now, 'utm_enabled': 0 } except ValueError as e: raise UMQueryCallError(__name__ + ' :: ' + str(e)) utm_query = sub_tokens(utm_query, db=conf.__cohort_meta_instance__, table=conf.__cohort_meta_db__) try: conn._cur_.execute(utm_query, params) conn._db_.commit() except (ProgrammingError, OperationalError) as e: conn._db_.rollback() raise UMQueryCallError(__name__ + ' :: ' + str(e)) del conn
def process_responses(response_queue, msg_in): """ Pulls responses off of the queue. """ log_name = '{0} :: {1}'.format(__name__, process_responses.__name__) logging.debug(log_name + ' - STARTING...') while 1: stream = '' # Block on the response queue try: res = response_queue.get(True) request_meta = rebuild_unpacked_request(res) except Exception: logging.error(log_name + ' - Could not get request meta') continue data = response_queue.get(True) while data: stream += data try: data = response_queue.get(True, timeout=1) except Empty: break try: data = eval(stream) except Exception as e: # Report a fraction of the failed response data directly in the # logger if len(unicode(stream)) > 2000: excerpt = stream[:1000] + ' ... ' + stream[-1000:] else: excerpt = stream logging.error(log_name + ' - Request failed. {0}\n\n' \ 'data excerpt: {1}'.format(e.message, excerpt)) # Format a response that will report on the failed request stream = "OrderedDict([('status', 'Request failed.'), " \ "('exception', '" + escape(unicode(e.message)) + "')," \ "('request', '" + escape(unicode(request_meta)) + "'), " \ "('data', '" + escape(unicode(stream)) + "')])" key_sig = build_key_signature(request_meta, hash_result=True) # Set request in list to "not alive" req_cb_flag_job_complete(key_sig, REQ_NCB_LOCK) logging.debug(log_name + ' - Setting data for {0}'.format( str(request_meta))) set_data(stream, request_meta) logging.debug(log_name + ' - SHUTTING DOWN...')
def requests_notification_callback(msg_queue_in, msg_queue_out): """ Asynchronous callback. Tracks status of requests and new requests. This callback utilizes ``msg_queue_in`` & ``msg_queue_out`` to manage request status. """ log_name = '{0} :: {1}'.format(__name__, requests_notification_callback.__name__) logging.debug('{0} - STARTING...'.format(log_name)) # TODO - potentially extend with an in-memory cache job_list = OrderedDict() while 1: try: msg = msg_queue_in.get(True) except IOError as e: logging.error(__name__ + ' :: Could not block ' 'on in queue: "{0}"'.format(e.message)) sleep(1) continue try: type = msg[0] except (KeyError, ValueError): logging.error(log_name + ' - No valid type ' \ '{0}'.format(str(msg))) continue # Init request if type == 0: try: job_list[msg[1]] = [True, msg[2]] logging.debug(log_name + ' - Initialize Request: ' \ '{0}.'.format(str(msg))) except Exception: logging.error(log_name + ' - Initialize Request' \ ' failed: {0}'.format(str(msg))) # Flag request complete - leave on queue elif type == 1: try: job_list[msg[1]][0] = False logging.debug(log_name + ' - Set request finished: ' \ '{0}.\n'.format(str(msg))) except Exception: logging.error(log_name + ' - Set request finished failed: ' \ '{0}\n'.format(str(msg))) # Is the key in the cache and running? elif type == 2: try: if msg[1] in job_list: msg_queue_out.put([job_list[msg[1]][0]], True) else: msg_queue_out.put([False], True) logging.debug(log_name + ' - Get request alive: ' \ '{0}.'.format(str(msg))) except (KeyError, ValueError): logging.error(log_name + ' - Get request alive failed: ' \ '{0}'.format(str(msg))) # Get keys elif type == 3: msg_queue_out.put(job_list.keys(), True) # Get url elif type == 4: try: if msg[1] in job_list: msg_queue_out.put([job_list[msg[1]][1]], True) else: logging.error(log_name + ' - Get URL failed: {0}'. format(str(msg))) except (KeyError, ValueError): logging.error(log_name + ' - Get URL failed: {0}'.format(str(msg))) else: logging.error(log_name + ' - Bad message: {0}'.format(str(msg))) logging.debug('{0} - SHUTTING DOWN...'.format(log_name))
def process_metrics(p, request_meta): """ Worker process for requests, forked from the job controller. This method handles: * Filtering cohort type: "regular" cohort, single user, user group * Secondary validation * """ log_name = '{0} :: {1}'.format(__name__, process_metrics.__name__) logging.info(log_name + ' - START JOB' '\n\tCOHORT = {0} - METRIC = {1}' ' - PID = {2})'. format(request_meta.cohort_expr, request_meta.metric, getpid())) err_msg = __name__ + ' :: Request failed.' users = list() # obtain user list - handle the case where a lone user ID is passed # !! The username should already be validated if request_meta.is_user: uid = MediaWikiUser.is_user_name(request_meta.cohort_expr, request_meta.project) if uid: valid = True users = [uid] else: valid = False err_msg = error_codes[3] # The "all" user group. All users within a time period. elif request_meta.cohort_expr == 'all': users = MediaWikiUser(query_type=1) try: users = [u for u in users.get_users( request_meta.start, request_meta.end, project=request_meta.project)] valid = True except Exception: valid = False err_msg = error_codes[5] # "TYPICAL" COHORT PROCESSING else: users = get_users(request_meta.cohort_expr) # Default project is what is stored in usertags_meta project = query_mod.get_cohort_project_by_meta( request_meta.cohort_expr) if project: request_meta.project = project logging.debug(__name__ + ' :: Using default project from ' \ 'usertags_meta {0}.'.format(project)) valid = True err_msg = '' if valid: # process request results = process_data_request(request_meta, users) results = str(results) response_size = getsizeof(results, None) if response_size > MAX_BLOCK_SIZE: index = 0 # Dump the data in pieces - block until it is picked up while index < response_size: p.put(results[index:index+MAX_BLOCK_SIZE], block=True) index += MAX_BLOCK_SIZE else: p.put(results, block=True) logging.info(log_name + ' - END JOB' '\n\tCOHORT = {0} - METRIC = {1}' ' - PID = {2})'. format(request_meta.cohort_expr, request_meta.metric, getpid())) else: p.put(err_msg, block=True) logging.info(log_name + ' - END JOB - FAILED.' '\n\tCOHORT = {0} - METRIC = {1}' ' - PID = {2})'. format(request_meta.cohort_expr, request_meta.metric, getpid()))
def job_control(request_queue, response_queue): """ Controls the execution of user metrics requests Parameters ~~~~~~~~~~ request_queue : multiprocessing.Queue Queues incoming API requests. """ # Store executed and pending jobs respectively job_queue = list() wait_queue = list() # Global job ID number job_id = 0 # Tallies the number of concurrently running jobs concurrent_jobs = 0 log_name = '{0} :: {1}'.format(__name__, job_control.__name__) logging.debug('{0} - STARTING...'.format(log_name)) while 1: # Request Queue Processing # ------------------------ try: # Pull an item off of the queue req_item = request_queue.get(timeout=QUEUE_WAIT) logging.debug(log_name + ' :: PULLING item from request queue -> ' \ '\n\tCOHORT = {0} - METRIC = {1}' .format(req_item['cohort_expr'], req_item['metric'])) except Exception as e: req_item = None #logging.debug('{0} :: {1} - Listening ...' #.format(__name__, job_control.__name__)) # Process complete jobs # --------------------- for job_item in job_queue: # Look for completed jobs if not job_item.queue.empty(): # Put request creds on res queue -- this goes to # response_handler asynchronously response_queue.put(unpack_fields(job_item.request), block=True) # Pull data off of the queue and add it to response queue while not job_item.queue.empty(): data = job_item.queue.get(True) if data: response_queue.put(data, block=True) del job_queue[job_queue.index(job_item)] concurrent_jobs -= 1 logging.debug(log_name + ' :: RUN -> RESPONSE - Job ID {0}' \ '\n\tConcurrent jobs = {1}' .format(str(job_item.id), concurrent_jobs)) # Process pending jobs # -------------------- for wait_req in wait_queue: if concurrent_jobs <= MAX_CONCURRENT_JOBS: # prepare job from item req_q = Queue() proc = Process(target=process_metrics, args=(req_q, wait_req)) proc.start() job_item = job_item_type(job_id, proc, wait_req, req_q) job_queue.append(job_item) del wait_queue[wait_queue.index(wait_req)] concurrent_jobs += 1 job_id += 1 logging.debug(log_name + ' :: WAIT -> RUN - Job ID {0}' \ '\n\tConcurrent jobs = {1}, ' \ 'COHORT = {2} - METRIC = {3}'\ .format(str(job_id), concurrent_jobs, wait_req.cohort_expr, wait_req.metric)) # Add newest job to the queue # --------------------------- if req_item: # Build the request item rm = rebuild_unpacked_request(req_item) logging.debug(log_name + ' : REQUEST -> WAIT ' \ '\n\tCOHORT = {0} - METRIC = {1}' .format(rm.cohort_expr, rm.metric)) wait_queue.append(rm) # Communicate with request notification callback about new job key_sig = build_key_signature(rm, hash_result=True) url = get_url_from_keys(build_key_signature(rm), REQUEST_PATH) req_cb_add_req(key_sig, url, REQ_NCB_LOCK) logging.debug('{0} - FINISHING.'.format(log_name))
def add_cohort_data(cohort, users, project, notes="", owner=1, group=3, add_meta=True): """ Adds a new cohort to backend. Parameters ~~~~~~~~~~ cohort : string Name of cohort (must be unique). users : list List of user ids to add to cohort. project : string Project of cohort. """ conn = Connector(instance=conf.__cohort_data_instance__) now = format_mediawiki_timestamp(datetime.now()) # TODO: ALLOW THE COHORT DEF TO BE REFRESHED IF IT ALREADY EXISTS if add_meta: logging.debug(__name__ + ' :: Adding new cohort "{0}".'. format(cohort)) if not notes: notes = 'Generated by: ' + __name__ # Create an entry in ``usertags_meta`` utm_query = query_store[add_cohort_data.__query_name__ + '_meta'] % { 'cohort_meta_instance': conf.__cohort_meta_instance__, 'cohort_meta_db': conf.__cohort_meta_db__, 'utm_name': escape_var(cohort), 'utm_project': escape_var(project), 'utm_notes': notes, 'utm_group': escape_var(str(group)), 'utm_owner': escape_var(str(owner)), 'utm_touched': now, 'utm_enabled': '0' } conn._cur_.execute(utm_query) try: conn._db_.commit() except (ProgrammingError, OperationalError): conn._db_.rollback() # add data to ``user_tags`` if users: # get uid for cohort usertag = get_cohort_id(cohort) logging.debug(__name__ + ' :: Adding cohort {0} users.'. format(len(users))) value_list_ut = [('{0}'.format(project), int(uid), int(usertag)) for uid in users] value_list_ut = str(value_list_ut)[1:-1] ut_query = query_store[add_cohort_data.__query_name__] % { 'cohort_meta_instance': conf.__cohort_meta_instance__, 'cohort_db': conf.__cohort_db__, 'value_list': value_list_ut } conn._cur_.execute(ut_query) try: conn._db_.commit() except (ProgrammingError, OperationalError): conn._db_.rollback() del conn
def add_cohort_data(cohort, users, project, notes="", owner=1, group=3, add_meta=True): """ Adds a new cohort to backend. Parameters ~~~~~~~~~~ cohort : string Name of cohort (must be unique). users : list List of user ids to add to cohort. project : string Project of cohort. """ conn = Connector(instance=conf.__cohort_data_instance__) now = format_mediawiki_timestamp(datetime.now()) # TODO: ALLOW THE COHORT DEF TO BE REFRESHED IF IT ALREADY EXISTS if add_meta: logging.debug(__name__ + ' :: Adding new cohort "{0}".'. format(cohort)) if not notes: notes = 'Generated by: ' + __name__ # Create an entry in ``usertags_meta`` utm_query = query_store[add_cohort_data.__query_name__ + '_meta'] try: params = { 'utm_name': str(cohort), 'utm_project': str(project), 'utm_notes': str(notes), 'utm_group': int(group), 'utm_owner': int(owner), 'utm_touched': now, 'utm_enabled': 0 } except ValueError as e: raise UMQueryCallError(__name__ + ' :: ' + str(e)) utm_query = sub_tokens(utm_query, db=conf.__cohort_meta_instance__, table=conf.__cohort_meta_db__) try: conn._cur_.execute(utm_query, params) conn._db_.commit() except (ProgrammingError, OperationalError) as e: conn._db_.rollback() raise UMQueryCallError(__name__ + ' :: ' + str(e)) # add data to ``user_tags`` if users: # get uid for cohort usertag = get_cohort_id(cohort) logging.debug(__name__ + ' :: Adding cohort {0} users.'. format(len(users))) try: value_list_ut = [('{0}'.format(project), int(uid), int(usertag)) for uid in users] except ValueError as e: raise UMQueryCallError(__name__ + ' :: ' + str(e)) ut_query = query_store[add_cohort_data.__query_name__] + '(' + \ ' %s,' * len(value_list_ut)[:-1] + ')' ut_query = sub_tokens(ut_query, db=conf.__cohort_meta_instance__, table=conf.__cohort_db__) try: conn._cur_.execute(ut_query, value_list_ut) conn._db_.commit() except (ProgrammingError, OperationalError) as e: conn._db_.rollback() raise UMQueryCallError(__name__ + ' :: ' + str(e)) del conn
def requests_notification_callback(msg_queue_in, msg_queue_out): """ Asynchronous callback. Tracks status of requests and new requests. This callback utilizes ``msg_queue_in`` & ``msg_queue_out`` to manage request status. """ log_name = "{0} :: {1}".format(__name__, requests_notification_callback.__name__) logging.debug("{0} - STARTING...".format(log_name)) cache = OrderedDict() while 1: try: msg = msg_queue_in.get(True) except IOError as e: logging.error(__name__ + " :: Could not block " 'on in queue: "{0}"'.format(e.message)) sleep(1) continue try: type = msg[0] except (KeyError, ValueError): logging.error(log_name + " - No valid type " "{0}".format(str(msg))) continue # Init request if type == 0: try: cache[msg[1]] = [True, msg[2]] logging.debug(log_name + " - Initialize Request: " "{0}.".format(str(msg))) except Exception: logging.error(log_name + " - Initialize Request" " failed: {0}".format(str(msg))) # Kill request - leave on cache elif type == 1: try: cache[msg[1]][0] = False logging.debug(log_name + " - Set request finished: " "{0}.\n".format(str(msg))) except Exception: logging.error(log_name + " - Set request finished failed: " "{0}\n".format(str(msg))) # Is the key in the cache and running? elif type == 2: try: if msg[1] in cache: msg_queue_out.put([cache[msg[1]][0]], True) else: msg_queue_out.put([False], True) logging.debug(log_name + " - Get request alive: " "{0}.".format(str(msg))) except (KeyError, ValueError): logging.error(log_name + " - Get request alive failed: " "{0}".format(str(msg))) # Get keys elif type == 3: msg_queue_out.put(cache.keys(), True) # Get url elif type == 4: try: if msg[1] in cache: msg_queue_out.put([cache[msg[1]][1]], True) else: logging.error(log_name + " - Get URL failed: {0}".format(str(msg))) except (KeyError, ValueError): logging.error(log_name + " - Get URL failed: {0}".format(str(msg))) else: logging.error(log_name + " - Bad message: {0}".format(str(msg))) logging.debug("{0} - SHUTTING DOWN...".format(log_name))
def job_control(): """ Controls the execution of user metrics requests Parameters ~~~~~~~~~~ request_queue : multiprocessing.Queue Queues incoming API requests. """ # Store executed and pending jobs respectively job_queue = list() # Global job ID number job_id = 0 # Tallies the number of concurrently running jobs concurrent_jobs = 0 log_name = '{0} :: {1}'.format(__name__, job_control.__name__) logging.debug('{0} - STARTING...'.format(log_name)) while 1: time.sleep(RESQUEST_TIMEOUT) # Request Queue Processing # ------------------------ # logging.debug(log_name + ' :: POLLING REQUESTS...') logging.debug(log_name + ' :: JOB QUEUE - {0}'.format(str(job_queue))) req_item = None # Only process if there are fewer than the maximum number of concurrent # jobs if concurrent_jobs < MAX_CONCURRENT_JOBS: # Pop from request target req_item = umapi_broker_context.pop(REQUEST_BROKER_TARGET) # Push to process target if req_item: url_hash = sha1(req_item.encode('utf-8')).hexdigest() umapi_broker_context.add(PROCESS_BROKER_TARGET, url_hash, req_item) logging.debug(log_name + ' :: PULLING item from request queue -> ' '\n\t{0}' .format(req_item)) # Process complete jobs # --------------------- if concurrent_jobs: for job_item in job_queue: if not job_item.queue.empty(): logging.info(log_name + ' :: READING RESPONSE - {0}'. format(job_item.request)) # Pull data off of the queue and add it to response queue data = '' while not job_item.queue.empty(): data += job_item.queue.get(True) # Remove from process target url_hash = sha1(job_item.request.encode('utf-8')).hexdigest() try: umapi_broker_context.remove(PROCESS_BROKER_TARGET, url_hash) except Exception as e: logging.error(log_name + ' :: Could not process ' '{0} from {1} -- {2}'. format(job_item.request, PROCESS_BROKER_TARGET, e.message)) # Add to response target umapi_broker_context.add(RESPONSE_BROKER_TARGET, url_hash, pack_response_for_broker( job_item.request, data)) del job_queue[job_queue.index(job_item)] concurrent_jobs -= 1 logging.debug(log_name + ' :: RUN -> RESPONSE - Job ID {0}' '\n\tConcurrent jobs = {1}' .format(str(job_item.id), concurrent_jobs)) # Process request # --------------- if req_item: req_q = Queue() proc = Process(target=process_metrics, args=(req_q, req_item)) proc.start() job_item = job_item_type(job_id, proc, req_item, req_q) job_queue.append(job_item) concurrent_jobs += 1 job_id += 1 logging.debug(log_name + ' :: WAIT -> RUN - Job ID {0}' '\n\tConcurrent jobs = {1}, REQ = {2}' .format(str(job_id), concurrent_jobs, req_item)) logging.debug('{0} - FINISHING.'.format(log_name))