Beispiel #1
0
def _process_help(args):
    """ Used by Threshold::process() for forking.
        Should not be called externally. """

    state = args[1]
    thread_args = RevertRateArgsClass(state[0], state[1], state[2],
                                      state[3], state[4], state[6],
                                      state[7], state[8])
    users = args[0]

    if thread_args.log_progress:
        logging.info(__name__ +
                    ' :: Computing reverts on %s users (PID %s)'
                    % (len(users), str(os.getpid())))
    results_agg = list()
    dropped_users = 0

    umpd_obj = UMP_MAP[thread_args.group](users, thread_args)
    for user_data in umpd_obj:

        total_revisions = 0.0
        total_reverts = 0.0

        # Call query on revert rate for each user
        #
        # 1. Obtain user registration date
        # 2. Compute end date based on 't'
        # 3. Get user revisions in time period
        query_args = namedtuple('QueryArgs', 'date_start date_end')\
            (format_mediawiki_timestamp(user_data.start),
             format_mediawiki_timestamp(user_data.end))

        try:
            revisions = query_mod.\
                revert_rate_user_revs_query(user_data.user,
                                            thread_args.project,
                                            query_args)
        except query_mod.UMQueryCallError as e:
            logging.error(__name__ + ' :: Failed to '
                                     'get revisions: {0}'.format(e.message))
            dropped_users += 1
            continue

        results_thread = mpw.build_thread_pool(revisions, _revision_proc,
                                               thread_args.rev_threads, state)

        for r in results_thread:
            total_revisions += r[0]
            total_reverts += r[1]
        if not total_revisions:
            results_agg.append([user_data.user, 0.0, total_revisions])
        else:
            results_agg.append([user_data.user, total_reverts / total_revisions,
                                total_revisions])

    if thread_args.log_progress:
        logging.debug(__name__ + ' :: PID {0} complete. Dropped users = {1}'.
            format(str(os.getpid()), dropped_users))

    return results_agg
Beispiel #2
0
    def set_connection(self, retries=20, timeout=1, **kwargs):
        """
            Establishes a database connection.

            Parameters (\*\*kwargs):
                - **db**: string value used to determine the database
                    connection
        """
        if 'instance' in kwargs:
            mysql_kwargs = {}
            for key in projSet.connections[kwargs['instance']]:
                mysql_kwargs[key] = projSet.connections[kwargs['instance']][
                                    key]

            while retries:
                try:
                    self._db_ = MySQLdb.connect(**mysql_kwargs)
                    break
                except MySQLdb.OperationalError as e:
                    logging.debug(__name__ + ' :: Connection dropped. '
                                             'Reopening MySQL connection. '
                                             '{0} retries left, timeout = {1}: '
                                             '"{2}"'.format(retries, timeout,
                                                            e.message))
                    sleep(timeout)
                    retries -= 1
            if not retries:
                raise ConnectorError()

            self._cur_ = self._db_.cursor()
Beispiel #3
0
def log_pool_worker_end(metric_name, worker_name, extra=''):
    """
        Logging method for job completion.
    """
    logging.debug('{0} :: {1}\n'
                  '\tPID = {2} complete.\n'
                  '\t{3}\n'.format(metric_name, worker_name, getpid(), extra))
    def wrapper(users, project, args):
        # ensure the handles are iterable
        if not hasattr(users, '__iter__'):
            users = [users]

        # get query and call
        if hasattr(args, 'log') and args.log:
            logging.debug(__name__ + ':: calling "%(method)s" '
                                     'in "%(project)s".' %
                                     {
                                         'method': f.__name__,
                                         'project': project
                                     }
                          )
        # Call query escaping user and project variables for SQL injection
        query = f(escape_var(users), escape_var(project), args)

        try:
            conn = Connector(instance=conf.PROJECT_DB_MAP[project])
        except KeyError:
            logging.error(__name__ + ' :: Project does not exist.')
            return []
        except ConnectorError:
            logging.error(__name__ + ' :: Could not establish a connection.')
            raise UMQueryCallError('Could not establish a connection.')

        try:
            conn._cur_.execute(query)
        except ProgrammingError:
            logging.error(__name__ +
                          'Could not get edit counts - Query failed.')
            raise UMQueryCallError()
        results = [row for row in conn._cur_]
        del conn
        return results
Beispiel #5
0
def get_data(request_meta, hash_result=True):
    """
        Extract data from the global hash given a request object.  If an item
        is successfully recovered data is returned
    """

    hash_table_ref = read_pickle_data()

    # Traverse the hash key structure to find data
    # @TODO rather than iterate through REQUEST_META_BASE &
    #   REQUEST_META_QUERY_STR look only at existing attributes

    logging.debug(__name__ + " - Attempting to pull data for request " \
                             "COHORT {0}, METRIC {1}".
                  format(request_meta.cohort_expr, request_meta.metric))

    key_sig = build_key_signature(request_meta, hash_result=hash_result)
    item = find_item(hash_table_ref, key_sig)

    if item:
        # item[0] will be a stringified structure that
        # is initialized, see set_data.
        return eval(item[0])
    else:
        return None
Beispiel #6
0
def log_pool_worker_start(metric_name, worker_name, data, args):
    """
        Logging method for processing pool workers.
    """
    logging.debug('{0} :: {1}\n'
                  '\tData = {2} rows\n'
                  '\tArgs = {3}\n'
                  '\tPID = {4}\n'.format(metric_name, worker_name, len(data),
                                         str(args), getpid()))
 def authenticate(self, password):
     password = escape(unicode(password))
     logging.debug(__name__ + ' :: Authenticating "{0}"/"{1}" '
                              'on hash "{2}" ...'.
         format(self.name, password, self.pw_hash))
     if self.check_password(password):
         self.authenticated = True
     else:
         self.authenticated = False
def _process_help(args):

    # Unpack args
    state = args[1]
    users = args[0]

    thread_args = um.UserMetric._unpack_params(state)

    # Log progress
    if thread_args.log_:
        logging.debug(__name__ + '::Computing live account. (PID = %s)' %
                                 getpid())

    # Extract edit button click from edit_page_tracking table (namespace,
    # article title, timestamp) of first click and registration timestamps
    # (join on logging table)
    #
    # Query will return: (user id, time of registration, time of first
    # edit button click)
    query_args = namedtuple('QueryArgs', 'namespace')(thread_args.namespace)
    query_results = query_mod.live_account_query(users, thread_args.project,
                                                 query_args)

    # Iterate over results to determine boolean indicating whether
    # account is "live"

    results = {str(user): -1 for user in users}

    user_reg = query_mod.user_registration_date_logging(
        users, thread_args.project, None)

    # uid: diff_time
    user_reg = {str(r[0]): (datetime.now() - date_parse(r[1])).
                            total_seconds() / 3600 for r in user_reg}

    # Flag all users alive longer than t hours as "not invalid"
    for user in results:
        if user in user_reg and user_reg[user] >= thread_args.t:
                results[user] = 0

    for row in query_results:
        user = str(row[0])
        try:
            # get the difference in hours
            diff = (date_parse(row[2]) - date_parse(row[1])).total_seconds()
            diff /= 3600
        except Exception:
            continue

        if diff <= thread_args.t:
            results[user] = 1
        else:
            results[user] = 0

    return [(str(key), results[key]) for key in results]
    def wrapper(users, project, args):
        # ensure the handles are iterable
        if not hasattr(users, '__iter__'):
            users = [users]

        # escape project & users
        users = escape_var(users)
        project = escape_var(project)

        # compose a csv of user ids
        user_str = DataLoader().format_comma_separated_list(users)

        # get query and call
        if hasattr(args, 'log') and args.log:
            logging.debug(__name__ + ':: calling "%(method)s" '
                                     'in "%(project)s".' %
                                     {
                                         'method': f.__name__,
                                         'project': project
                                     }
                          )
        # 1. Synthesize query
        # 2. substitute project
        query, params = f(users, project, args)
        query = sub_tokens(query, db=project, users=user_str)
        try:
            conn = Connector(instance=conf.PROJECT_DB_MAP[project])
        except KeyError:
            logging.error(__name__ + ' :: Project does not exist.')
            return []
        except ConnectorError:
            logging.error(__name__ + ' :: Could not establish a connection.')
            raise UMQueryCallError(__name__ + ' :: Could not '
                                              'establish a connection.')

        try:
            if params:
                conn._cur_.execute(query, params)
            else:
                conn._cur_.execute(query)
        except (OperationalError, ProgrammingError) as e:
            logging.error(__name__ +
                          ' :: Query failed: {0}, params = {1}'.
                          format(query, str(params)))
            raise UMQueryCallError(__name__ + ' :: ' + str(e))
        results = [row for row in conn._cur_]
        del conn
        return results
        def register_user(self):
            """ Writes the user credentials to the datastore. """

            # 1. Only users not already registered
            # 2. Ensure that the user is unique
            # 3. Write the user / pass to the db

            if not self.active:
                if not query_mod.get_api_user(self.name, by_id=False):
                    query_mod.insert_api_user(self.name, self.pw_hash)
                    logging.debug(__name__ + ' :: Added user {0}'.
                        format(self.name))
                else:
                    logging.error(__name__ + 'Could not add user {0}'.
                        format(self.name))
                self.active = True
def add_cohort_users(cohort_name, user_records):
    conn = Connector(instance=conf.__cohort_data_instance__)
    ut_query = query_store[add_cohort_users.__query_name__]
    ut_query = sub_tokens(ut_query, db=conf.__cohort_meta_instance__,
            table=conf.__cohort_db__)
    ut_query += ','.join(['%s']* len(user_records))
    cohort_id= get_cohort_id(cohort_name)
    value_list_ut = [(rec['project'], rec['user_id'],
        int(cohort_id)) for rec in user_records]
    try:
        logging.debug('ut_query:\n%s', ut_query)
        logging.debug('value_list_ut:\n%s', value_list_ut)
        conn._cur_.execute(ut_query, value_list_ut)
        conn._db_.commit()
    except (ProgrammingError, OperationalError) as e:
        conn._db_.rollback()
        raise UMQueryCallError(__name__ + ' :: ' + str(e))
    del conn
def _process_help(args):
    """
        First determine if the user has made an adequate number of
        edits.  If so, compute the number of minutes that passed
        between the Nth and Mth edit.

            - Parameters:
                - **user_handle** - List(int).  List of user ids.
                - **first_edit** - Integer.  The numeric value of
                    the first edit from which to measure the threshold.
                - **threshold_edit** - Integer.  The numeric value of
                    the threshold edit from which to measure the
                    threshold
    """

    # Unpack args
    state = args[1]
    users = args[0]

    thread_args = um.UserMetric._unpack_params(state)

    if thread_args.log_:
        logging.debug(__name__ + '::Computing Time to threshold on '
                                 '{0} users. (PID = {1})'.format(len(users),
                                                                 getpid()))

    minutes_to_threshold = list()

    # For each user gather their revisions and produce a time diff
    for user in users:
        revs = query_mod.\
            time_to_threshold_revs_query(user, thread_args.project, None)
        revs = [rev[0] for rev in revs]
        minutes_to_threshold.append(
            [user, get_minute_diff_result(revs,
                                          thread_args.threshold_edit,
                                          thread_args.first_edit)])

    if thread_args.log_:
        logging.info(__name__ + '::Processed PID = {0}.'.format(getpid()))

    return minutes_to_threshold
        def __init__(self, username, authenticated=False):

            self.name = escape(unicode(username))
            self.authenticated = authenticated

            user_ref =  query_mod.get_api_user(username, by_id=False)
            if user_ref:
                self.id = unicode(user_ref[1])
                self.active = True
                self.pw_hash = unicode(str(user_ref[2]))
            else:
                self.id = None
                self.active = False
                self.pw_hash = None

            logging.debug(__name__ + ' :: Initiatializing user obj. '
                                     'user: "******", '
                                     'is active: "{1}", '
                                     'is auth: {2}'.
                format(username, self.active, self.authenticated))
    def login():
        if request.method == "POST" and "username" in request.form:

            username = escape(unicode(str(request.form["username"])))
            passwd = escape(unicode(str(request.form["password"])))
            remember = request.form.get("remember", "no") == "yes"

            # Initialize user
            user_ref = APIUser(username)
            user_ref.authenticate(passwd)

            logging.debug(__name__ + ' :: Authenticating "{0}"/"{1}" ...'.format(username, passwd))

            if user_ref.is_authenticated():
                login_user(user_ref, remember=remember)
                flash("Logged in.")
                return redirect(request.args.get("next") or url_for("api_root"))
            else:
                flash("Login failed.")
        return render_template("login.html")
def process_response():
    """ Pulls responses off of the queue. """

    log_name = '{0} :: {1}'.format(__name__, process_response.__name__)
    logging.debug(log_name  + ' - STARTING...')

    while 1:

        time.sleep(RESPONSE_TIMEOUT)

        # Handle any responses as they enter the queue
        # logging.debug(log_name  + ' - POLLING RESPONSES...')
        res_item = umapi_broker_context.pop(RESPONSE_BROKER_TARGET)
        if not res_item:
            continue

        req, data = unpack_response_for_broker(res_item)
        request_meta = build_request_obj(req)

        # Add result to cache once completed
        logging.debug(log_name + ' - Setting data for {0}'.format(
            str(request_meta)))
        set_data(data, request_meta)

    logging.debug(log_name + ' - SHUTTING DOWN...')
Beispiel #16
0
def _process_help(args):

    # Unpack args
    state = args[1]
    thread_args = LiveAccountArgsClass(state[0], state[1], state[2], state[3],
                                       state[4], state[5])
    users = args[0]

    # Log progress
    if thread_args.log:
        logging.debug(__name__ + '::Computing live account. (PID = %s)' %
                                 getpid())

    # Extract edit button click from edit_page_tracking table (namespace,
    # article title, timestamp) of first click and registration timestamps
    # (join on logging table)
    #
    # Query will return: (user id, time of registration, time of first
    # edit button click)
    query_args = namedtuple('QueryArgs', 'namespace')(thread_args.namespace)
    query_results = query_mod.live_account_query(users, thread_args.project,
                                                 query_args)

    # Iterate over results to determine boolean indicating whether
    # account is "live"
    results = {long(user): -1 for user in users}
    for row in query_results:
        try:
            # get the difference in minutes
            diff = (date_parse(row[2]) - date_parse(row[1])).total_seconds()
            diff /= 60
        except Exception:
            continue

        if diff <= thread_args.t:
            results[row[0]] = 1
        else:
            results[row[0]] = 0

    return [(str(key), results[key]) for key in results]
Beispiel #17
0
    def login():
        if request.method == 'POST' and 'username' in request.form:

            username = escape(unicode(str(request.form['username'])))
            passwd = escape(unicode(str(request.form['password'])))
            remember = request.form.get('remember', 'no') == 'yes'

            # Initialize user
            user_ref = APIUser(username)
            user_ref.authenticate(passwd)

            logging.debug(__name__ + ' :: Authenticating "{0}"/"{1}" ...'.
                format(username, passwd))

            if user_ref.is_authenticated():
                login_user(user_ref, remember=remember)
                flash('Logged in.')
                return redirect(request.args.get('next')
                                or url_for('api_root'))
            else:
                flash('Login failed.')
        return render_template('login.html')
Beispiel #18
0
def set_data(data, request_meta, hash_result=True):
    """
        Given request meta-data and a dataset create a key path in the global
        hash to store the data
    """
    hash_table_ref = read_pickle_data()

    key_sig = build_key_signature(request_meta, hash_result=hash_result)
    logging.debug(__name__ + " :: Adding data to hash @ key signature = {0}".
                  format(str(key_sig)))
    if hash_result:
        key_sig_full = build_key_signature(request_meta, hash_result=False)
        hash_table_ref[key_sig] = (data, key_sig_full)
    else:
        last_item = key_sig[-1]
        for item in key_sig:
            if item == last_item:
                hash_table_ref[last_item] = data
            else:
                hash_table_ref[item] = OrderedDict()
            hash_table_ref = hash_table_ref[item]
    write_pickle_data(hash_table_ref)
def _process_help(args):
    """
        Worker thread method for edit count.
    """

    # Unpack args
    users = args[0]
    state = args[1]

    metric_params = um.UserMetric._unpack_params(state)
    query_args_type = namedtuple("QueryArgs", "date_start date_end")

    logging.debug(__name__ + ":: Executing EditCount on " "%s users (PID = %s)" % (len(users), getpid()))

    # Call user period method
    umpd_obj = UMP_MAP[metric_params.group](users, metric_params)
    results = list()
    for t in umpd_obj:
        args = query_args_type(t.start, t.end)

        # Build edit count results list
        results += query_mod.edit_count_user_query(t.user, metric_params.project, args)
    return results
def create_cohort(cohort, project,
                    notes="", owner=1, group=3):
    conn = Connector(instance=conf.__cohort_data_instance__)
    now = format_mediawiki_timestamp(datetime.now())

    # TODO: ALLOW THE COHORT DEF TO BE REFRESHED IF IT ALREADY EXISTS

    logging.debug(__name__ + ' :: Adding new cohort "{0}".'.
                  format(cohort))
    if not notes:
        notes = 'Generated by: ' + __name__

    # Create an entry in ``usertags_meta``
    utm_query = query_store[create_cohort.__query_name__]

    try:
        params = {
            'utm_name': str(cohort),
            'utm_project': str(project),
            'utm_notes': str(notes),
            'utm_group': int(group),
            'utm_owner': int(owner),
            'utm_touched': now,
            'utm_enabled': 0
        }
    except ValueError as e:
        raise UMQueryCallError(__name__ + ' :: ' + str(e))

    utm_query = sub_tokens(utm_query, db=conf.__cohort_meta_instance__,
                           table=conf.__cohort_meta_db__)
    try:
        conn._cur_.execute(utm_query, params)
        conn._db_.commit()
    except (ProgrammingError, OperationalError) as e:
        conn._db_.rollback()
        raise UMQueryCallError(__name__ + ' :: ' + str(e))
    del conn
def process_responses(response_queue, msg_in):
    """ Pulls responses off of the queue. """

    log_name = '{0} :: {1}'.format(__name__, process_responses.__name__)
    logging.debug(log_name  + ' - STARTING...')

    while 1:
        stream = ''

        # Block on the response queue
        try:
            res = response_queue.get(True)
            request_meta = rebuild_unpacked_request(res)
        except Exception:
            logging.error(log_name + ' - Could not get request meta')
            continue

        data = response_queue.get(True)
        while data:
            stream += data
            try:
                data = response_queue.get(True, timeout=1)
            except Empty:
                break

        try:
            data = eval(stream)
        except Exception as e:

            # Report a fraction of the failed response data directly in the
            # logger
            if len(unicode(stream)) > 2000:
                excerpt = stream[:1000] + ' ... ' + stream[-1000:]
            else:
                excerpt = stream

            logging.error(log_name + ' - Request failed. {0}\n\n' \
                                     'data excerpt: {1}'.format(e.message, excerpt))

            # Format a response that will report on the failed request
            stream = "OrderedDict([('status', 'Request failed.'), " \
                     "('exception', '" + escape(unicode(e.message)) + "')," \
                     "('request', '" + escape(unicode(request_meta)) + "'), " \
                     "('data', '" + escape(unicode(stream)) + "')])"

        key_sig = build_key_signature(request_meta, hash_result=True)

        # Set request in list to "not alive"
        req_cb_flag_job_complete(key_sig, REQ_NCB_LOCK)

        logging.debug(log_name + ' - Setting data for {0}'.format(
            str(request_meta)))
        set_data(stream, request_meta)

    logging.debug(log_name + ' - SHUTTING DOWN...')
def requests_notification_callback(msg_queue_in, msg_queue_out):
    """
        Asynchronous callback.  Tracks status of requests and new requests.
        This callback utilizes ``msg_queue_in`` & ``msg_queue_out`` to
        manage request status.
    """
    log_name = '{0} :: {1}'.format(__name__,
                                   requests_notification_callback.__name__)
    logging.debug('{0}  - STARTING...'.format(log_name))

    # TODO - potentially extend with an in-memory cache
    job_list = OrderedDict()
    while 1:

        try:
            msg = msg_queue_in.get(True)
        except IOError as e:
            logging.error(__name__ + ' :: Could not block '
                                     'on in queue: "{0}"'.format(e.message))
            sleep(1)
            continue

        try:
            type = msg[0]
        except (KeyError, ValueError):
            logging.error(log_name + ' - No valid type ' \
                                     '{0}'.format(str(msg)))
            continue

        # Init request
        if type == 0:
            try:
                job_list[msg[1]] = [True, msg[2]]
                logging.debug(log_name + ' - Initialize Request: ' \
                                         '{0}.'.format(str(msg)))
            except Exception:
                logging.error(log_name + ' - Initialize Request' \
                                         ' failed: {0}'.format(str(msg)))

        # Flag request complete - leave on queue
        elif type == 1:
            try:
                job_list[msg[1]][0] = False
                logging.debug(log_name + ' - Set request finished: ' \
                                         '{0}.\n'.format(str(msg)))
            except Exception:
                logging.error(log_name + ' - Set request finished failed: ' \
                                         '{0}\n'.format(str(msg)))

        # Is the key in the cache and running?
        elif type == 2:
            try:
                if msg[1] in job_list:
                    msg_queue_out.put([job_list[msg[1]][0]], True)
                else:
                    msg_queue_out.put([False], True)
                logging.debug(log_name + ' - Get request alive: ' \
                                         '{0}.'.format(str(msg)))
            except (KeyError, ValueError):
                logging.error(log_name + ' - Get request alive failed: ' \
                                         '{0}'.format(str(msg)))

        # Get keys
        elif type == 3:
            msg_queue_out.put(job_list.keys(), True)

        # Get url
        elif type == 4:
            try:
                if msg[1] in job_list:
                    msg_queue_out.put([job_list[msg[1]][1]], True)
                else:
                    logging.error(log_name + ' - Get URL failed: {0}'.
                    format(str(msg)))
            except (KeyError, ValueError):
                logging.error(log_name + ' - Get URL failed: {0}'.format(str(msg)))
        else:
            logging.error(log_name + ' - Bad message: {0}'.format(str(msg)))

    logging.debug('{0}  - SHUTTING DOWN...'.format(log_name))
def process_metrics(p, request_meta):
    """
        Worker process for requests, forked from the job controller.  This
        method handles:

            * Filtering cohort type: "regular" cohort, single user, user group
            * Secondary validation
            *
    """

    log_name = '{0} :: {1}'.format(__name__, process_metrics.__name__)

    logging.info(log_name + ' - START JOB'
                            '\n\tCOHORT = {0} - METRIC = {1}'
                            ' -  PID = {2})'.
        format(request_meta.cohort_expr, request_meta.metric, getpid()))

    err_msg = __name__ + ' :: Request failed.'
    users = list()

    # obtain user list - handle the case where a lone user ID is passed
    # !! The username should already be validated
    if request_meta.is_user:
        uid = MediaWikiUser.is_user_name(request_meta.cohort_expr,
                                         request_meta.project)
        if uid:
            valid = True
            users = [uid]
        else:
            valid = False
            err_msg = error_codes[3]

    # The "all" user group.  All users within a time period.
    elif request_meta.cohort_expr == 'all':
        users = MediaWikiUser(query_type=1)

        try:
            users = [u for u in users.get_users(
                request_meta.start, request_meta.end,
                project=request_meta.project)]
            valid = True
        except Exception:
            valid = False
            err_msg = error_codes[5]

    # "TYPICAL" COHORT PROCESSING
    else:
        users = get_users(request_meta.cohort_expr)

        # Default project is what is stored in usertags_meta
        project = query_mod.get_cohort_project_by_meta(
            request_meta.cohort_expr)
        if project:
            request_meta.project = project
        logging.debug(__name__ + ' :: Using default project from ' \
                                 'usertags_meta {0}.'.format(project))

        valid = True
        err_msg = ''

    if valid:
        # process request
        results = process_data_request(request_meta, users)
        results = str(results)
        response_size = getsizeof(results, None)

        if response_size > MAX_BLOCK_SIZE:
            index = 0

            # Dump the data in pieces - block until it is picked up
            while index < response_size:
                p.put(results[index:index+MAX_BLOCK_SIZE], block=True)
                index += MAX_BLOCK_SIZE
        else:
            p.put(results, block=True)

        logging.info(log_name + ' - END JOB'
                                '\n\tCOHORT = {0} - METRIC = {1}'
                                ' -  PID = {2})'.
            format(request_meta.cohort_expr, request_meta.metric, getpid()))

    else:
        p.put(err_msg, block=True)
        logging.info(log_name + ' - END JOB - FAILED.'
                                '\n\tCOHORT = {0} - METRIC = {1}'
                                ' -  PID = {2})'.
        format(request_meta.cohort_expr, request_meta.metric, getpid()))
def job_control(request_queue, response_queue):
    """
        Controls the execution of user metrics requests

        Parameters
        ~~~~~~~~~~

        request_queue : multiprocessing.Queue
           Queues incoming API requests.

    """

    # Store executed and pending jobs respectively
    job_queue = list()
    wait_queue = list()

    # Global job ID number
    job_id = 0

    # Tallies the number of concurrently running jobs
    concurrent_jobs = 0

    log_name = '{0} :: {1}'.format(__name__, job_control.__name__)

    logging.debug('{0} - STARTING...'.format(log_name))

    while 1:

        # Request Queue Processing
        # ------------------------

        try:
            # Pull an item off of the queue

            req_item = request_queue.get(timeout=QUEUE_WAIT)

            logging.debug(log_name + ' :: PULLING item from request queue -> ' \
                                     '\n\tCOHORT = {0} - METRIC = {1}'
                .format(req_item['cohort_expr'], req_item['metric']))

        except Exception as e:
            req_item = None
            #logging.debug('{0} :: {1}  - Listening ...'
            #.format(__name__, job_control.__name__))


        # Process complete jobs
        # ---------------------

        for job_item in job_queue:

            # Look for completed jobs
            if not job_item.queue.empty():

                # Put request creds on res queue -- this goes to
                # response_handler asynchronously
                response_queue.put(unpack_fields(job_item.request),
                                   block=True)

                # Pull data off of the queue and add it to response queue
                while not job_item.queue.empty():
                    data = job_item.queue.get(True)
                    if data:
                        response_queue.put(data, block=True)

                del job_queue[job_queue.index(job_item)]

                concurrent_jobs -= 1

                logging.debug(log_name + ' :: RUN -> RESPONSE - Job ID {0}' \
                                         '\n\tConcurrent jobs = {1}'
                    .format(str(job_item.id), concurrent_jobs))


        # Process pending jobs
        # --------------------

        for wait_req in wait_queue:
            if concurrent_jobs <= MAX_CONCURRENT_JOBS:
                # prepare job from item

                req_q = Queue()
                proc = Process(target=process_metrics, args=(req_q, wait_req))
                proc.start()

                job_item = job_item_type(job_id, proc, wait_req, req_q)
                job_queue.append(job_item)

                del wait_queue[wait_queue.index(wait_req)]

                concurrent_jobs += 1
                job_id += 1

                logging.debug(log_name + ' :: WAIT -> RUN - Job ID {0}' \
                                         '\n\tConcurrent jobs = {1}, ' \
                                         'COHORT = {2} - METRIC = {3}'\
                    .format(str(job_id), concurrent_jobs,
                            wait_req.cohort_expr, wait_req.metric))


        # Add newest job to the queue
        # ---------------------------

        if req_item:

            # Build the request item
            rm = rebuild_unpacked_request(req_item)

            logging.debug(log_name + ' : REQUEST -> WAIT ' \
                                     '\n\tCOHORT = {0} - METRIC = {1}'
                .format(rm.cohort_expr, rm.metric))
            wait_queue.append(rm)

            # Communicate with request notification callback about new job
            key_sig = build_key_signature(rm, hash_result=True)
            url = get_url_from_keys(build_key_signature(rm), REQUEST_PATH)
            req_cb_add_req(key_sig, url, REQ_NCB_LOCK)


    logging.debug('{0} - FINISHING.'.format(log_name))
def add_cohort_data(cohort, users, project,
                    notes="", owner=1, group=3,
                    add_meta=True):
    """
        Adds a new cohort to backend.

        Parameters
        ~~~~~~~~~~

            cohort : string
                Name of cohort (must be unique).

            users : list
                List of user ids to add to cohort.

            project : string
                Project of cohort.
    """
    conn = Connector(instance=conf.__cohort_data_instance__)
    now = format_mediawiki_timestamp(datetime.now())

    # TODO: ALLOW THE COHORT DEF TO BE REFRESHED IF IT ALREADY EXISTS

    if add_meta:
        logging.debug(__name__ + ' :: Adding new cohort "{0}".'.
            format(cohort))
        if not notes:
            notes = 'Generated by: ' + __name__

        # Create an entry in ``usertags_meta``
        utm_query = query_store[add_cohort_data.__query_name__ + '_meta'] % {
            'cohort_meta_instance': conf.__cohort_meta_instance__,
            'cohort_meta_db': conf.__cohort_meta_db__,
            'utm_name': escape_var(cohort),
            'utm_project': escape_var(project),
            'utm_notes': notes,
            'utm_group': escape_var(str(group)),
            'utm_owner': escape_var(str(owner)),
            'utm_touched': now,
            'utm_enabled': '0'
        }
        conn._cur_.execute(utm_query)
        try:
            conn._db_.commit()
        except (ProgrammingError, OperationalError):
            conn._db_.rollback()

    # add data to ``user_tags``
    if users:

        # get uid for cohort
        usertag = get_cohort_id(cohort)

        logging.debug(__name__ + ' :: Adding cohort {0} users.'.
            format(len(users)))
        value_list_ut = [('{0}'.format(project),
                          int(uid),
                          int(usertag))
                         for uid in users]
        value_list_ut = str(value_list_ut)[1:-1]

        ut_query = query_store[add_cohort_data.__query_name__] % {
            'cohort_meta_instance': conf.__cohort_meta_instance__,
            'cohort_db': conf.__cohort_db__,
            'value_list': value_list_ut
        }
        conn._cur_.execute(ut_query)
        try:
            conn._db_.commit()
        except (ProgrammingError, OperationalError):
            conn._db_.rollback()

    del conn
Beispiel #26
0
def add_cohort_data(cohort, users, project,
                    notes="", owner=1, group=3,
                    add_meta=True):
    """
        Adds a new cohort to backend.

        Parameters
        ~~~~~~~~~~

            cohort : string
                Name of cohort (must be unique).

            users : list
                List of user ids to add to cohort.

            project : string
                Project of cohort.
    """
    conn = Connector(instance=conf.__cohort_data_instance__)
    now = format_mediawiki_timestamp(datetime.now())

    # TODO: ALLOW THE COHORT DEF TO BE REFRESHED IF IT ALREADY EXISTS

    if add_meta:
        logging.debug(__name__ + ' :: Adding new cohort "{0}".'.
                      format(cohort))
        if not notes:
            notes = 'Generated by: ' + __name__

        # Create an entry in ``usertags_meta``
        utm_query = query_store[add_cohort_data.__query_name__ + '_meta']

        try:
            params = {
                'utm_name': str(cohort),
                'utm_project': str(project),
                'utm_notes': str(notes),
                'utm_group': int(group),
                'utm_owner': int(owner),
                'utm_touched': now,
                'utm_enabled': 0
            }
        except ValueError as e:
            raise UMQueryCallError(__name__ + ' :: ' + str(e))

        utm_query = sub_tokens(utm_query, db=conf.__cohort_meta_instance__,
                               table=conf.__cohort_meta_db__)
        try:
            conn._cur_.execute(utm_query, params)
            conn._db_.commit()
        except (ProgrammingError, OperationalError) as e:
            conn._db_.rollback()
            raise UMQueryCallError(__name__ + ' :: ' + str(e))

    # add data to ``user_tags``
    if users:
        # get uid for cohort
        usertag = get_cohort_id(cohort)

        logging.debug(__name__ + ' :: Adding cohort {0} users.'.
                      format(len(users)))

        try:
            value_list_ut = [('{0}'.format(project),
                              int(uid),
                              int(usertag))
                             for uid in users]
        except ValueError as e:
            raise UMQueryCallError(__name__ + ' :: ' + str(e))

        ut_query = query_store[add_cohort_data.__query_name__] + '(' + \
                   ' %s,' * len(value_list_ut)[:-1] + ')'
        ut_query = sub_tokens(ut_query, db=conf.__cohort_meta_instance__,
                              table=conf.__cohort_db__)
        try:
            conn._cur_.execute(ut_query, value_list_ut)
            conn._db_.commit()
        except (ProgrammingError, OperationalError) as e:
            conn._db_.rollback()
            raise UMQueryCallError(__name__ + ' :: ' + str(e))
    del conn
Beispiel #27
0
def requests_notification_callback(msg_queue_in, msg_queue_out):
    """
        Asynchronous callback.  Tracks status of requests and new requests.
        This callback utilizes ``msg_queue_in`` & ``msg_queue_out`` to
        manage request status.
    """
    log_name = "{0} :: {1}".format(__name__, requests_notification_callback.__name__)
    logging.debug("{0}  - STARTING...".format(log_name))

    cache = OrderedDict()
    while 1:

        try:
            msg = msg_queue_in.get(True)
        except IOError as e:
            logging.error(__name__ + " :: Could not block " 'on in queue: "{0}"'.format(e.message))
            sleep(1)
            continue

        try:
            type = msg[0]
        except (KeyError, ValueError):
            logging.error(log_name + " - No valid type " "{0}".format(str(msg)))
            continue

        # Init request
        if type == 0:
            try:
                cache[msg[1]] = [True, msg[2]]
                logging.debug(log_name + " - Initialize Request: " "{0}.".format(str(msg)))
            except Exception:
                logging.error(log_name + " - Initialize Request" " failed: {0}".format(str(msg)))

        # Kill request - leave on cache
        elif type == 1:
            try:
                cache[msg[1]][0] = False
                logging.debug(log_name + " - Set request finished: " "{0}.\n".format(str(msg)))
            except Exception:
                logging.error(log_name + " - Set request finished failed: " "{0}\n".format(str(msg)))

        # Is the key in the cache and running?
        elif type == 2:
            try:
                if msg[1] in cache:
                    msg_queue_out.put([cache[msg[1]][0]], True)
                else:
                    msg_queue_out.put([False], True)
                logging.debug(log_name + " - Get request alive: " "{0}.".format(str(msg)))
            except (KeyError, ValueError):
                logging.error(log_name + " - Get request alive failed: " "{0}".format(str(msg)))

        # Get keys
        elif type == 3:
            msg_queue_out.put(cache.keys(), True)

        # Get url
        elif type == 4:
            try:
                if msg[1] in cache:
                    msg_queue_out.put([cache[msg[1]][1]], True)
                else:
                    logging.error(log_name + " - Get URL failed: {0}".format(str(msg)))
            except (KeyError, ValueError):
                logging.error(log_name + " - Get URL failed: {0}".format(str(msg)))
        else:
            logging.error(log_name + " - Bad message: {0}".format(str(msg)))

    logging.debug("{0}  - SHUTTING DOWN...".format(log_name))
def job_control():
    """
        Controls the execution of user metrics requests

        Parameters
        ~~~~~~~~~~

        request_queue : multiprocessing.Queue
           Queues incoming API requests.

    """

    # Store executed and pending jobs respectively
    job_queue = list()

    # Global job ID number
    job_id = 0

    # Tallies the number of concurrently running jobs
    concurrent_jobs = 0

    log_name = '{0} :: {1}'.format(__name__, job_control.__name__)

    logging.debug('{0} - STARTING...'.format(log_name))

    while 1:

        time.sleep(RESQUEST_TIMEOUT)

        # Request Queue Processing
        # ------------------------

        # logging.debug(log_name + ' :: POLLING REQUESTS...')
        logging.debug(log_name + ' :: JOB QUEUE - {0}'.format(str(job_queue)))
        req_item = None

        # Only process if there are fewer than the maximum number of concurrent
        # jobs
        if concurrent_jobs < MAX_CONCURRENT_JOBS:

            # Pop from request target
            req_item = umapi_broker_context.pop(REQUEST_BROKER_TARGET)

            # Push to process target
            if req_item:
                url_hash = sha1(req_item.encode('utf-8')).hexdigest()
                umapi_broker_context.add(PROCESS_BROKER_TARGET, url_hash,
                                         req_item)

                logging.debug(log_name + ' :: PULLING item from request queue -> '
                                         '\n\t{0}'
                              .format(req_item))

        # Process complete jobs
        # ---------------------

        if concurrent_jobs:
            for job_item in job_queue:

                if not job_item.queue.empty():

                    logging.info(log_name + ' :: READING RESPONSE - {0}'.
                        format(job_item.request))

                    # Pull data off of the queue and add it to response queue
                    data = ''
                    while not job_item.queue.empty():
                        data += job_item.queue.get(True)

                    # Remove from process target
                    url_hash = sha1(job_item.request.encode('utf-8')).hexdigest()
                    try:
                        umapi_broker_context.remove(PROCESS_BROKER_TARGET,
                                                    url_hash)
                    except Exception as e:
                        logging.error(log_name + ' :: Could not process '
                                                 '{0} from {1}  -- {2}'.
                            format(job_item.request,
                                   PROCESS_BROKER_TARGET,
                                   e.message))

                    # Add to response target
                    umapi_broker_context.add(RESPONSE_BROKER_TARGET, url_hash,
                                             pack_response_for_broker(
                                                 job_item.request, data))

                    del job_queue[job_queue.index(job_item)]
                    concurrent_jobs -= 1
                    logging.debug(log_name + ' :: RUN -> RESPONSE - Job ID {0}'
                                             '\n\tConcurrent jobs = {1}'
                                  .format(str(job_item.id), concurrent_jobs))

        # Process request
        # ---------------

        if req_item:
            req_q = Queue()
            proc = Process(target=process_metrics, args=(req_q, req_item))
            proc.start()

            job_item = job_item_type(job_id, proc, req_item, req_q)
            job_queue.append(job_item)

            concurrent_jobs += 1
            job_id += 1

            logging.debug(log_name + ' :: WAIT -> RUN - Job ID {0}'
                                     '\n\tConcurrent jobs = {1}, REQ = {2}'
                          .format(str(job_id), concurrent_jobs, req_item))

    logging.debug('{0} - FINISHING.'.format(log_name))