Example #1
0
def _get_revisions(args):
    """ Retrieve total set of revision records for users within timeframe """
    um.log_pool_worker_start(__name__, _get_revisions.__name__, args[0], args[1])

    users = args[0]
    state = args[1]

    metric_params = um.UserMetric._unpack_params(state)
    query_args_type = namedtuple('QueryArgs', 'date_start date_end namespace')

    revs = list()
    umpd_obj = UMP_MAP[metric_params.group](users, metric_params)
    try:
        for t in umpd_obj:
            revs += \
                list(query_mod.rev_query(t.user, metric_params.project,
                                         query_args_type(t.start, t.end,
                                                         metric_params.namespace)))
    except query_mod.UMQueryCallError as e:
        logging.error('{0}:: {1}. PID={2}'.format(__name__,
                                                  e.message, os.getpid()))
        return []

    um.log_pool_worker_end(__name__, _process_help.__name__)
    return revs
Example #2
0
def _process_help(args):
    """
        Determine the bytes added over a number of revisions for user(s).  The
        parameter *user_handle* can be either a string or an integer or a list
        of these types.  When the *user_handle* type is integer it is
        interpreted as a user id, and as a user_name for string input.  If a
        list of users is passed to the *process* method then a dict object
        with edit rates keyed by user handles is returned.

        The flow of the request is as follows:

            #. Get all revisions for the specified users in the given
                timeframe
            #. For each parent revision get its length
            #. Compute the difference in length between each revision and its
                parent
            #. Record edit count, raw bytes added (with sign and absolute),
                amount of positive bytes added, amount of negative bytes added

        - Parameters:
            - **user_handle** - String or Integer (optionally lists).  Value
                or list of values representing user handle(s).
        - Return:
            - Dictionary. key(string): user handle, value(Float): edit counts
    """
    um.log_pool_worker_start(__name__, _process_help.__name__, args[0], args[1])

    revs = args[0]
    state = args[1]

    metric_params = um.UserMetric._unpack_params(state)
    bytes_added = dict()

    # Get the difference for each revision length from the parent
    # to compute bytes added
    row_count = 1
    missed_records = 0
    total_rows = len(revs)


    for row in revs:
        try:
            user = str(row[0])
            rev_len_total = int(row[1])
            parent_rev_id = row[2]

        except IndexError:
            missed_records += 1
            continue
        except TypeError:
            missed_records += 1
            continue

        # Produce the revision length of the parent.  In case of a new
        # article, parent_rev_id = 0, no record in the db
        if parent_rev_id == 0:
            parent_rev_len = 0
        else:
            try:
                parent_rev_len = query_mod.rev_len_query(parent_rev_id,
                                                         metric_params.project)
            except query_mod.UMQueryCallError:
                missed_records += 1
                logging.error(__name__ +
                              '::Could not produce rev diff for %s on '
                              'rev_id %s.' % (user, str(parent_rev_id)))
                continue

        # Update the bytes added hash - ignore revision if either rev length
        # is undetermined
        try:
            bytes_added_bit = int(rev_len_total) - int(parent_rev_len)
        except TypeError:
            missed_records += 1
            continue

        try:
            # Exception where the user does not exist.  Handle this by
            # creating the key
            bytes_added[user][0] += bytes_added_bit
        except KeyError:
            bytes_added[user] = [0] * 5
            bytes_added[user][0] += bytes_added_bit
            pass

        bytes_added[user][1] += abs(bytes_added_bit)
        if bytes_added_bit > 0:
            bytes_added[user][2] += bytes_added_bit
        else:
            bytes_added[user][3] += bytes_added_bit
        bytes_added[user][4] += 1

        row_count += 1

    results = [[user] + bytes_added[user] for user in bytes_added]

    extra = 'Processed {0} out of {1} records.'.\
        format(total_rows - missed_records, total_rows)
    um.log_pool_worker_end(__name__, _process_help.__name__, extra=extra)

    return results