def process(self, user_handle, **kwargs): """ Setup metrics gathering using multiprocessing """ self.apply_default_kwargs(kwargs,'process') k = kwargs['num_threads'] log_progress = bool(kwargs['log_progress']) log_frequency = int(kwargs['log_frequency']) if user_handle: if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] # build the argument lists for each thread if not user_handle: sql = bytes_added_rev_user_query(self._start_ts_, self._end_ts_) if log_progress: logging.info( __name__ + '::Getting all distinct users: " %s "' % sql) user_handle = [str(row[0]) for row in self._data_source_.execute_SQL(sql)] if log_progress: logging.info( __name__ + '::Retrieved %s users.' % len(user_handle)) # get revisions args = [log_progress, self._start_ts_, self._end_ts_, self._project_, self._namespace_] revs = mpw.build_thread_pool(user_handle,_get_revisions,k,args) # Start worker threads and aggregate results for bytes added args = [log_progress, log_frequency, self._project_] self._results = agg.list_sum_by_group( mpw.build_thread_pool(revs,_process_help,k,args),0) # Add any missing users - O(n) tallied_users = set([str(r[0]) for r in self._results]) for user in user_handle: if not tallied_users.__contains__(str(user)): # Add a row indicating no activity for that user self._results.append([user,0,0,0,0,0]) return self
def process(self, user_handle, **kwargs): self.apply_default_kwargs(kwargs,'process') if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] # ensure the handles are iterable k = int(kwargs['num_threads']) log = bool(kwargs['log']) if log: logging.info(__name__ + "::parameters = " + str(kwargs)) # Multiprocessing vs. single processing execution args = [self._project_, log, self._start_ts_, self._end_ts_] self._results = mpw.build_thread_pool(user_handle,_process_help,k,args) return self
def process(self, user_handle, **kwargs): self.apply_default_kwargs(kwargs,'process') # ensure the handles are iterable if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] k = int(kwargs['num_threads']) k_r = int(kwargs['rev_threads']) log_progress = bool(kwargs['log_progress']) args = [self._project_, log_progress, self.look_ahead, self.look_back, self._start_ts_, self._end_ts_, k_r] self._results = mpw.build_thread_pool(user_handle, _process_help, k, args) return self
def _process_help(args): """ Used by Threshold::process() for forking. Should not be called externally. """ state = args[1] thread_args = RevertRateArgsClass(state[0],state[1],state[2], state[3],state[4],state[5],state[6]) user_data = args[0] if thread_args.log_progress: logging.info(__name__ + '::Computing reverts on %s users in thread %s.' % (len(user_data), str(os.getpid()))) results_agg = list() for user in user_data: conn = dl.Connector(instance='slave') conn._cur_.execute( revert_rate_user_revs_query(thread_args.project, user, thread_args.date_start, thread_args.date_end) ) total_revisions = 0.0 total_reverts = 0.0 revisions = [rev for rev in conn._cur_] del conn results_thread = mpw.build_thread_pool(revisions, _revision_proc, thread_args.rev_threads, state) for r in results_thread: total_revisions += r[0] total_reverts += r[1] if not total_revisions: results_agg.append([user, 0.0, total_revisions]) else: results_agg.append([user, total_reverts / total_revisions, total_revisions]) if thread_args.log_progress: logging.info(__name__ + '::PID %s complete.' % (str(os.getpid()))) return results_agg
def process(self, user_handle, **kwargs): """ This function gathers threahold (survival) metric data by: :: 1. selecting all new user registrations within the timeframe and in the user list (empty means select all withing the timeframe.) 2. For each user id find the number of revisions before (after) the threshold (survival) cut-off time t - Parameters: - **user_handle** - String or Integer (optionally lists). Value or list of values representing user handle(s). **NOTA BENE** - kwarg "survival" is used to execute has this determine survival rather than a threshold metric """ self.apply_default_kwargs(kwargs,'process') k = kwargs['num_threads'] log_progress = bool(kwargs['log_progress']) survival = bool(kwargs['survival']) restrict = bool(kwargs['restrict']) # Format condition on user ids. if no user handle exists there is no # condition. if not hasattr(user_handle, '__iter__'): user_handle = [user_handle] if not user_handle: user_handle.append(-1) # No user is matched reg_query = threshold_reg_query(user_handle, self._project_) self._data_source_._cur_.execute(reg_query) # Process results user_data = [r for r in self._data_source_._cur_] args = [self._project_, self._namespace_, self._n_, self._t_, log_progress, survival, restrict, self._start_ts_, self._end_ts_] self._results = mpw.build_thread_pool(user_data,_process_help,k,args) return self