コード例 #1
0
def get_data(url, hash_result=True):
    """
        Extract data from the global hash given a request object.  If an item
        is successfully recovered data is returned
    """

    request_obj = build_request_obj(url)
    hash_table_ref = read_pickle_data()

    # Traverse the hash key structure to find data
    # @TODO rather than iterate through REQUEST_META_BASE &
    #   REQUEST_META_QUERY_STR look only at existing attributes

    logging.debug(__name__ + " - Attempting to pull data for request " \
                             "COHORT {0}, METRIC {1}".
                  format(request_obj.cohort_expr, request_obj.metric))

    key_sig = build_key_signature(request_obj, hash_result=hash_result)
    item = find_item(hash_table_ref, key_sig)

    if item:
        # item[0] will be a stringified structure that
        # is initialized, see set_data.
        try:
            return eval(item[0])
        except SyntaxError:
            logging.error(__name__ + ' :: Failed to retrieve {0}'.
                          format(key_sig))
            return None
    else:
        return None
コード例 #2
0
 def set_password(self, password):
     try:
         password = escape(unicode(password))
         self.pw_hash = generate_password_hash(password)
     except (TypeError, NameError) as e:
         logging.error(__name__ + ' :: Hash set error - ' + e.message)
         self.pw_hash = None
コード例 #3
0
ファイル: data.py プロジェクト: dartar/user_metrics
def build_key_signature(request_meta, hash_result=False):
    """
        Given a RequestMeta object contruct a hashkey.

        Parameters
        ~~~~~~~~~~

            request_meta : RequestMeta
                Stores request data.
    """
    key_sig = list()

    # Build the key signature -- These keys must exist
    for key_name in REQUEST_META_BASE:
        key = getattr(request_meta, key_name)
        if key:
            key_sig.append(key_name + HASH_KEY_DELIMETER + key)
        else:
            logging.error(__name__ + ' :: Request must include %s. '
                                     'Cannot set data %s.' %
                                     (key_name, str(request_meta)))
            return ''
    # These keys may optionally exist
    for key_name in REQUEST_META_QUERY_STR:
        if hasattr(request_meta, key_name):
            key = getattr(request_meta, key_name)
            if key:
                key_sig.append(key_name + HASH_KEY_DELIMETER + str(key))

    if hash_result:
        return sha1(str(key_sig).encode('utf-8')).hexdigest()
    else:
        return key_sig
コード例 #4
0
def format_namespace(namespace, col='page_namespace'):
    """ Format the namespace condition in queries and returns the string.

        Expects a list of numeric namespace keys.  Otherwise returns
        an empty condition string.

        ** THIS METHOD ONLY EMITS SQL SAFE STRINGS **
    """
    ns_cond = ''

    # Copy so as not to affect mutable ref
    namespace = deepcopy(namespace)

    if hasattr(namespace, '__iter__'):
        if len(namespace) == 1:
            ns_cond = '{0} = '.format(col) \
                + escape_var(str(namespace.pop()))
        else:
            ns_cond = '{0} in ('.format(col) + \
                ",".join(DataLoader()
                .cast_elems_to_string(escape_var(list(namespace)))) + ')'
    else:
        try:
            ns_cond = '{0} = '.format(col) + escape_var(int(namespace))
        except ValueError:
            # No namespace condition
            logging.error(__name__ + ' :: Could not apply namespace '
                                     'condition on {0}'.format(str(namespace)))
            pass

    return ns_cond
コード例 #5
0
ファイル: bytes_added.py プロジェクト: dartar/user_metrics
def _get_revisions(args):
    """ Retrieve total set of revision records for users within timeframe """
    um.log_pool_worker_start(__name__, _get_revisions.__name__, args[0], args[1])

    users = args[0]
    state = args[1]

    metric_params = um.UserMetric._unpack_params(state)
    query_args_type = namedtuple('QueryArgs', 'date_start date_end namespace')

    revs = list()
    umpd_obj = UMP_MAP[metric_params.group](users, metric_params)
    try:
        for t in umpd_obj:
            revs += \
                list(query_mod.rev_query(t.user, metric_params.project,
                                         query_args_type(t.start, t.end,
                                                         metric_params.namespace)))
    except query_mod.UMQueryCallError as e:
        logging.error('{0}:: {1}. PID={2}'.format(__name__,
                                                  e.message, os.getpid()))
        return []

    um.log_pool_worker_end(__name__, _process_help.__name__)
    return revs
コード例 #6
0
ファイル: data.py プロジェクト: dartar/user_metrics
def get_cohort_refresh_datetime(utm_id):
    """
        Get the latest refresh datetime of a cohort.  Returns current time
        formatted as a string if the field is not found.
    """

    # @TODO MOVE DB REFS INTO QUERY MODULE
    conn = dl.Connector(instance=settings.__cohort_data_instance__)
    query = """ SELECT utm_touched FROM usertags_meta WHERE utm_id = %s """
    conn._cur_.execute(query, int(utm_id))

    utm_touched = None
    try:
        utm_touched = conn._cur_.fetchone()[0]
    except ValueError:
        pass

    # Ensure the field was retrieved
    if not utm_touched:
        logging.error(__name__ + '::Missing utm_touched for cohort %s.' %
                                 str(utm_id))
        utm_touched = datetime.now()

    del conn
    return utm_touched.strftime(DATETIME_STR_FORMAT)
コード例 #7
0
ファイル: revert_rate.py プロジェクト: dartar/user_metrics
def _process_help(args):
    """ Used by Threshold::process() for forking.
        Should not be called externally. """

    state = args[1]
    thread_args = RevertRateArgsClass(state[0], state[1], state[2],
                                      state[3], state[4], state[6],
                                      state[7], state[8])
    users = args[0]

    if thread_args.log_progress:
        logging.info(__name__ +
                    ' :: Computing reverts on %s users (PID %s)'
                    % (len(users), str(os.getpid())))
    results_agg = list()
    dropped_users = 0

    umpd_obj = UMP_MAP[thread_args.group](users, thread_args)
    for user_data in umpd_obj:

        total_revisions = 0.0
        total_reverts = 0.0

        # Call query on revert rate for each user
        #
        # 1. Obtain user registration date
        # 2. Compute end date based on 't'
        # 3. Get user revisions in time period
        query_args = namedtuple('QueryArgs', 'date_start date_end')\
            (format_mediawiki_timestamp(user_data.start),
             format_mediawiki_timestamp(user_data.end))

        try:
            revisions = query_mod.\
                revert_rate_user_revs_query(user_data.user,
                                            thread_args.project,
                                            query_args)
        except query_mod.UMQueryCallError as e:
            logging.error(__name__ + ' :: Failed to '
                                     'get revisions: {0}'.format(e.message))
            dropped_users += 1
            continue

        results_thread = mpw.build_thread_pool(revisions, _revision_proc,
                                               thread_args.rev_threads, state)

        for r in results_thread:
            total_revisions += r[0]
            total_reverts += r[1]
        if not total_revisions:
            results_agg.append([user_data.user, 0.0, total_revisions])
        else:
            results_agg.append([user_data.user, total_reverts / total_revisions,
                                total_revisions])

    if thread_args.log_progress:
        logging.debug(__name__ + ' :: PID {0} complete. Dropped users = {1}'.
            format(str(os.getpid()), dropped_users))

    return results_agg
コード例 #8
0
ファイル: views.py プロジェクト: dartar/user_metrics
def all_urls():
    """ View for listing all requests.  Retrieves from cache """

    # @TODO - this reads the entire cache into memory, filters will be needed
    # This extracts ALL data from the cache, the data is assumed to be in the
    # form of <hash key -> (data, key signature)> pairs.  The key signature is
    # extracted to reconstruct the url.

    all_data = read_pickle_data()
    key_sigs = list()

    for key, val in all_data.iteritems():
        if hasattr(val, '__iter__'):
            try:
                key_sigs.append(val[1])
            except (KeyError, IndexError):
                logging.error(__name__ + ' :: Could not render key signature '
                                         'from data, key = {0}'.format(key))

    # Compose urls from key sigs
    url_list = list()
    for key_sig in key_sigs:

        url = get_url_from_keys(key_sig, 'cohorts')
        url_list.append("".join(['<a href="',
                                 request.url_root, url + '">',
                                 url,
                                 '</a>']))
    return render_template('all_urls.html', urls=url_list)
コード例 #9
0
    def __init__(self, **kwargs):
        super(TimeToThreshold, self).__init__(**kwargs)

        try:
            self._threshold_obj_ = self.__threshold_types[self.threshold_type_class](**kwargs)
        except NameError:
            logging.error(__name__ + "::Invalid threshold class. " "Using default (EditCountThreshold).")
            self._threshold_obj_ = self.EditCountThreshold(**kwargs)
コード例 #10
0
def teardown():
    """ When the instance is deleted store the pickled data and shutdown
        the job controller """

    # Shutdown API handlers gracefully
    try:
        terminate_process_with_checks(job_controller_proc)
        terminate_process_with_checks(response_controller_proc)
    except Exception:
        logging.error(__name__ + ' :: Could not shut down callbacks.')
コード例 #11
0
ファイル: request_manager.py プロジェクト: embr/user_metrics
def req_cb_get_is_running(key, lock):
    lock.acquire()
    req_notification_queue_in.put([2, key], True)
    try:
        val = req_notification_queue_out.get(block=True, timeout=BLOCK_TIMEOUT)[0]
    except Empty:
        logging.error(__name__ + " :: req_cb_get_is_running -" " Block time expired.")
        return False
    lock.release()
    return val
コード例 #12
0
ファイル: request_manager.py プロジェクト: embr/user_metrics
def req_cb_get_cache_keys(lock):
    lock.acquire()
    req_notification_queue_in.put([3], block=True)
    try:
        val = req_notification_queue_out.get(block=True, timeout=BLOCK_TIMEOUT)
    except Empty:
        logging.error(__name__ + " :: req_cb_get_cache_keys -" " Block time expired.")
        return []
    lock.release()
    return val
コード例 #13
0
def process_responses(response_queue, msg_in):
    """ Pulls responses off of the queue. """

    log_name = '{0} :: {1}'.format(__name__, process_responses.__name__)
    logging.debug(log_name  + ' - STARTING...')

    while 1:
        stream = ''

        # Block on the response queue
        try:
            res = response_queue.get(True)
            request_meta = rebuild_unpacked_request(res)
        except Exception:
            logging.error(log_name + ' - Could not get request meta')
            continue

        data = response_queue.get(True)
        while data:
            stream += data
            try:
                data = response_queue.get(True, timeout=1)
            except Empty:
                break

        try:
            data = eval(stream)
        except Exception as e:

            # Report a fraction of the failed response data directly in the
            # logger
            if len(unicode(stream)) > 2000:
                excerpt = stream[:1000] + ' ... ' + stream[-1000:]
            else:
                excerpt = stream

            logging.error(log_name + ' - Request failed. {0}\n\n' \
                                     'data excerpt: {1}'.format(e.message, excerpt))

            # Format a response that will report on the failed request
            stream = "OrderedDict([('status', 'Request failed.'), " \
                     "('exception', '" + escape(unicode(e.message)) + "')," \
                     "('request', '" + escape(unicode(request_meta)) + "'), " \
                     "('data', '" + escape(unicode(stream)) + "')])"

        key_sig = build_key_signature(request_meta, hash_result=True)

        # Set request in list to "not alive"
        req_cb_flag_job_complete(key_sig, REQ_NCB_LOCK)

        logging.debug(log_name + ' - Setting data for {0}'.format(
            str(request_meta)))
        set_data(stream, request_meta)

    logging.debug(log_name + ' - SHUTTING DOWN...')
コード例 #14
0
def req_cb_get_url(key, lock):
    lock.acquire()
    req_notification_queue_in.put([4, key], block=True)
    try:
        val = req_notification_queue_out.get(True, timeout=BLOCK_TIMEOUT)[0]
    except Empty:
        logging.error(__name__ + ' :: req_cb_get_url -'
                                 ' Block time expired.')
        return ''
    lock.release()
    return val
コード例 #15
0
ファイル: revert_rate.py プロジェクト: dartar/user_metrics
def __future(rev_id, page_id, n, project, namespace):
    """ Produce the n revisions on a page after a given revision
            Returns a generator of revision objects """
    try:
        future = query_mod.page_rev_hist_query(rev_id, page_id, n, project,
                                                namespace, look_ahead=True)
    except query_mod.UMQueryCallError as e:
        logging.error(__name__ + ' :: Failed to '
                                 'get revision future: {0}'.format(e.message))
        future = list()
    return future
コード例 #16
0
 def check_password(self, password):
     if self.pw_hash:
         try:
             password = escape(unicode(password))
             return check_password_hash(self.pw_hash, password)
         except (TypeError, NameError) as e:
             logging.error(__name__ +
                           ' :: Hash check error - ' + e.message)
             return False
     else:
         return False
コード例 #17
0
ファイル: revert_rate.py プロジェクト: dartar/user_metrics
def __history(rev_id, page_id, n, project, namespace):
    """ Produce the n revisions on a page before a given revision
            Returns a generator of revision objects """
    try:
        history =  query_mod.page_rev_hist_query(rev_id, page_id, n, project,
                                                 namespace, look_ahead=False)
    except query_mod.UMQueryCallError as e:
        logging.error(__name__ + ' :: Failed to '
                                 'get revision history: {0}'.format(e.message))
        history = list()
    return history
コード例 #18
0
ファイル: data_loader.py プロジェクト: dartar/user_metrics
 def format_condition_in(self, field_name, item_list, include_quotes=False):
     """ Formats a SQL "in" condition """
     if hasattr(item_list, '__iter__'):
         list_str = self.format_comma_separated_list(
             self.cast_elems_to_string(item_list),
             include_quotes=include_quotes)
         list_cond = "%s in (%s)" % (field_name, list_str)
         return list_cond
     else:
         logging.error(__name__ + '::format_condition_in - '
                                  'item_list must implement the '
                                  'iterable interface.')
         return ''
コード例 #19
0
def is_valid_cohort_query(cohort_name):
    conn = Connector(instance=conf.__cohort_data_instance__)
    query = query_store[is_valid_cohort_query.__name__]
    query = sub_tokens(query, db=conf.__cohort_meta_instance__,
                       table=conf.__cohort_meta_db__)
    params = {'utm_name' : cohort_name}
    conn._cur_.execute(query, params)
    try:
        cohorts = conn._cur_.fetchall()
    except (OperationalError, ProgrammingError) as e:
        logging.error(__name__ +
                      ' :: Query failed: {0}, params = {1}'.
                      format(query, str(params)))
        return False
    return len(cohorts) == 0
コード例 #20
0
ファイル: data_loader.py プロジェクト: dartar/user_metrics
    def get_column_names(self):
        """
            Return the column names from the connection cursor (latest
            executed query)

            Return:
                - List(string).  Column names from latest query results.
        """
        try:
            column_data = self._cur_.description
        except AttributeError:
            column_data = []
            logging.error(__name__ + ' :: No column description for this '
                                     'connection.')
        return [elem[0] for elem in column_data]
コード例 #21
0
        def register_user(self):
            """ Writes the user credentials to the datastore. """

            # 1. Only users not already registered
            # 2. Ensure that the user is unique
            # 3. Write the user / pass to the db

            if not self.active:
                if not query_mod.get_api_user(self.name, by_id=False):
                    query_mod.insert_api_user(self.name, self.pw_hash)
                    logging.debug(__name__ + ' :: Added user {0}'.
                        format(self.name))
                else:
                    logging.error(__name__ + 'Could not add user {0}'.
                        format(self.name))
                self.active = True
コード例 #22
0
def is_valid_username_query(username, project):
    conn = Connector(instance=conf.PROJECT_DB_MAP[project])
    query = query_store[is_valid_username_query.__name__]
    query = sub_tokens(query, db=escape_var(project))
    params = {'username' : username}
    conn._cur_.execute(query, params) 
    try:
        ids = conn._cur_.fetchall()
    except (OperationalError, ProgrammingError) as e:
        logging.error(__name__ +
                      ' :: Query failed: {0}, params = {1}'.
                      format(query, str(params)))
        raise
    if len(ids) == 1:
        return ids[0][0]
    else:
        return None
コード例 #23
0
ファイル: data.py プロジェクト: dartar/user_metrics
def get_users(cohort_expr):
    """ get users from cohort """

    if search(COHORT_REGEX, cohort_expr):
        logging.info(__name__ + ' :: Processing cohort by expression.')
        users = [user for user in parse_cohorts(cohort_expr)]
    else:
        logging.info(__name__ + ' :: Processing cohort by tag name.')
        try:
            id = query_mod.get_cohort_id(cohort_expr)
            users = [u for u in query_mod.get_cohort_users(id)]
        except (IndexError, TypeError,
                query_mod.UMQueryCallError) as e:
            logging.error(__name__ + ' :: Could not retrieve users '
                                     'for cohort {0}: {1}'.
                format(cohort_expr, str(e)))
            return []
    return users
コード例 #24
0
 def get_all_items(self, target):
     """
     Retrieve all values in the target
     """
     all_keys = list()
     try:
         with open(target, 'r') as f:
             lines = f.read().split('\n')
             for idx, line in enumerate(lines):
                 try:
                     item = json.loads(line)
                 except Exception:
                     logging.error(__name__ + ' :: Could not parse JSON '
                                              'from: {0}'.format(line))
                     continue
                 all_keys.append(item)
     except IOError:
         with open(target, 'w'):
             pass
     return all_keys
コード例 #25
0
    def wrapper(users, project, args):
        # ensure the handles are iterable
        if not hasattr(users, '__iter__'):
            users = [users]

        # get query and call
        if hasattr(args, 'log') and args.log:
            logging.debug(__name__ + ':: calling "%(method)s" '
                                     'in "%(project)s".' %
                                     {
                                         'method': f.__name__,
                                         'project': project
                                     }
                          )
        # Call query escaping user and project variables for SQL injection
        query = f(escape_var(users), escape_var(project), args)

        try:
            conn = Connector(instance=conf.PROJECT_DB_MAP[project])
        except KeyError:
            logging.error(__name__ + ' :: Project does not exist.')
            return []
        except ConnectorError:
            logging.error(__name__ + ' :: Could not establish a connection.')
            raise UMQueryCallError('Could not establish a connection.')

        try:
            conn._cur_.execute(query)
        except ProgrammingError:
            logging.error(__name__ +
                          'Could not get edit counts - Query failed.')
            raise UMQueryCallError()
        results = [row for row in conn._cur_]
        del conn
        return results
コード例 #26
0
ファイル: data_loader.py プロジェクト: dartar/user_metrics
    def list_to_xsv(self, nested_list, separator='\t', log=False,
                    outfile='list_to_xsv.out'):
        """
            Transforms a nested list or t

            Parameters:
                - **nested_list** - List(List()). Nested list to insert to xsv.
                - **separator**: String.  The separating character in the file.
                    Default to tab.
        """
        try:
            file_obj = open(projSet.__data_file_dir__ + outfile, 'w')
        except IOError as e:
            logging.error(__name__ + ' :: Could not open '
                                     'xsv for writing: %s' % e.message)
            return

        if hasattr(nested_list, '__iter__'):
            for elem in nested_list:
                new_elems = self.cast_elems_to_string(elem)
                line_in = separator.join(new_elems) + '\n'
                try:
                    file_obj.write(line_in)
                except IOError:
                    if log: logging.error('Could not write: "%s"' %
                                          str(line_in.strip()))
        else:
            logging.error('Expected an iterable to write to file.')

        file_obj.close()
コード例 #27
0
ファイル: data.py プロジェクト: sudeepdas/E3_analysis
def get_cohort_id(utm_name):
    """ Pull cohort ids from cohort handles """

    # @TODO MOVE DB REFS INTO QUERY MODULE
    conn = dl.Connector(instance='slave')
    conn._cur_.execute('SELECT utm_id FROM usertags_meta '
                       'WHERE utm_name = "%s"' % str(escape(utm_name)))

    utm_id = None
    try:
        utm_id = conn._cur_.fetchone()[0]
    except ValueError:
        pass

    # Ensure the field was retrieved
    if not utm_id:
        logging.error(__name__ + '::Missing utm_id for cohort %s.' %
                                 str(utm_name))
        utm_id = -1

    del conn
    return utm_id
コード例 #28
0
def _map_request_values(request_meta):
    """
        Map values from the request.  Use ``REQUEST_VALUE_MAPPING`` convert
        coded values from the request if a familiar encoding is present.

            Parameters
            ~~~~~~~~~~

            request_meta : recordtype:
                Stores the request data.
    """
    for attr in REQUEST_VALUE_MAPPING:
        if hasattr(request_meta, attr):
            request_value = None
            try:
                request_value = getattr(request_meta, attr)
                map_val = REQUEST_VALUE_MAPPING[attr][request_value]
                setattr(request_meta, attr, map_val)
            except KeyError:
                logging.error(__name__ + ' :: Could not map request value '
                                         '{0} for variable {1}.'.
                              format(str(request_value), attr))
コード例 #29
0
def _process_help(args):
    """
        Worker thread method for NamespaceOfEdits::process().
    """

    users = args[0]
    state = args[1]

    metric_params = um.UserMetric._unpack_params(state)
    query_args_type = namedtuple('QueryArgs', 'start end')

    if metric_params.log_:
        logging.info(__name__ + '::Computing namespace edits. (PID = %s)' %
                                getpid())

    # Tally counts of namespace edits
    results = dict()
    ump_res = UMP_MAP[metric_params.group](users, metric_params)
    for ump_rec in ump_res:

        results[str(ump_rec.user)] = OrderedDict()

        for ns in NamespaceEdits.VALID_NAMESPACES:
            results[str(ump_rec.user)][str(ns)] = 0

        query_results = query_mod.namespace_edits_rev_query([ump_rec.user],
            metric_params.project,
            query_args_type(ump_rec.start, ump_rec.end))

        for row in query_results:
            try:
                if row[1] in NamespaceEdits.VALID_NAMESPACES:
                    results[str(row[0])][str(row[1])] = int(row[2])
            except (KeyError, IndexError):
                logging.error(__name__ + "::Could not process row: %s" % str(row))
                continue

    return [(user, results[user]) for user in results]
コード例 #30
0
 def pop(self, target):
     """
     Pop the top value from the list
     """
     try:
         with open(target, 'r') as f:
             contents = f.read()
             if contents:
                 lines = contents.split('\n')
                 if len(lines):
                     try:
                         item = json.loads(lines[0])
                         key = item.keys()[0]
                     except (KeyError, ValueError):
                         logging.error(__name__ + ' :: FileBroker.pop - '
                                                  'Could not parse key.')
                         return None
                     self.remove(target, key)
                     return item[key]
     except IOError:
         with open(target, 'w'):
             pass
     return None