def __init__(self, num_users, RBP_persistence_params, \ population_time_away_mean, population_time_away_stddev, \ lateness_decay, window_size, fix_persistence, fix_away_mean, fix_reading_mean, push_threshold, interaction_mode): super(MSUPushRankedOrder, self).__init__(num_users) self.population_model = LognormalAwayPersistenceSessionsPopulationModel(self.seed, population_time_away_mean, \ population_time_away_stddev, \ RBP_persistence_params, \ lateness_decay, ) self.sampled_users = [] self.update_emit_times = [] self.update_confidences = [] self.update_lengths = [] self.window_size = window_size self.fix_persistence = fix_persistence self.fix_reading_mean = fix_reading_mean self.fix_away_mean = fix_away_mean self.push_threshold = push_threshold self.interaction_mode = interaction_mode self.user_counter = 0 self.ignore_verbosity = False
def __init__(self, num_users, RBP_persistence_params, \ population_time_away_mean, population_time_away_stddev, \ pop_lateness_decay, window_size, fix_persistence, fix_time_away_param): super(MSURankedOrder, self).__init__(num_users) self.population_model = LognormalAwayPersistenceSessionsPopulationModel(self.seed, population_time_away_mean, \ population_time_away_stddev, \ RBP_persistence_params, \ pop_lateness_decay, ) self.sampled_users = [] self.update_emit_times = [] self.window_size = window_size self.fix_persistence = fix_persistence self.fix_time_away_param = fix_time_away_param self.user_counter = 0
class MSUPushRankedOrder(ModeledStreamUtility, PushRankedInterfaceMixin): """ Simulates users reading updates in ranked order at every session. Users persist in reading updates at every session based on the RBP user model [Moffat, TOIS 2008] """ def __init__(self, num_users, RBP_persistence_params, \ population_time_away_mean, population_time_away_stddev, \ lateness_decay, window_size, fix_persistence, fix_away_mean, fix_reading_mean, push_threshold, interaction_mode): super(MSUPushRankedOrder, self).__init__(num_users) self.population_model = LognormalAwayPersistenceSessionsPopulationModel(self.seed, population_time_away_mean, \ population_time_away_stddev, \ RBP_persistence_params, \ lateness_decay, ) self.sampled_users = [] self.update_emit_times = [] self.update_confidences = [] self.update_lengths = [] self.window_size = window_size self.fix_persistence = fix_persistence self.fix_reading_mean = fix_reading_mean self.fix_away_mean = fix_away_mean self.push_threshold = push_threshold self.interaction_mode = interaction_mode self.user_counter = 0 self.ignore_verbosity = False def normalize_confidences(self): #logger.warning(self.update_confidences) maxconf = max(self.update_confidences) minconf = min(self.update_confidences) if maxconf == minconf: maxconf = 1.0 minconf = 0.0 self.update_confidences = array.array( 'd', map(lambda x: (x - minconf) / (maxconf - minconf), self.update_confidences)) #logger.warning(self.update_confidences) def initialize_structures_for_topic(self, topic_updates): self.presort_updates(topic_updates) self.update_emit_times = array.array( 'd', [upd.time for upd in topic_updates]) self.update_confidences = array.array( 'd', [upd.conf for upd in topic_updates]) self.normalize_confidences() self.update_lengths = array.array('d', [upd.wlen for upd in topic_updates]) def sample_users_from_population(self, query_duration): if self.sampled_users: self.sampled_users = [] self.query_duration = query_duration # reset the seed so that the same users are # generated every time this function is called self.population_model.reset_random_seed() for ui in xrange(self.num_users): A, P, V, L = self.population_model.generate_user_params() if self.fix_persistence: P = self.fix_persistence if self.fix_away_mean: A = self.fix_away_mean if self.fix_reading_mean: V = self.fix_reading_mean self.sampled_users.append( LognormalAwayRBPPersistenceUserModel(A, P, V, L)) def _compute_user_MSU(self, user_instance, updates): # self.user_counter += 1 # if self.user_counter % 1000 == 0: # logger.warning('{0} users simulated'.format(self.user_counter)) user_topic_msu = 0.0 user_topic_pain = 0.0 # generate user trail # - if in case a user reads till the start of the next session. # - new ranked updates are shown when the new session starts # - simulates that the user though persisting now wants new information (reload page because I am scraping the bottom here) #if self.user_counter == 23: #logger.setLevel(logging.DEBUG) #logger.warning('user {}'.format(user_instance)) user_trail = self.generate_user_trail( user_instance, self.update_confidences, self.update_emit_times, self.query_duration, self.push_threshold, self.interaction_mode) # logger.debug('user_trail {}'.format(user_trail)) window_starts = array.array( 'd', map( lambda x: x[0] - self.window_size if x[0] - self.window_size >= 0.0 else 0.0, user_trail)) if self.window_size == -1: window_starts = array.array('d', [0.0] * len(user_trail)) # logger.debug('window_starts {}'.format(str(window_starts))) ssn_starts = array.array('d', [s for s, r, t in user_trail]) # logger.debug('ssn_starts {}'.format(str(ssn_starts))) num_sessions = len(user_trail) # logger.debug('----------- user {} {}-------------------'.format(self.user_counter, user_instance)) user_topic_msu, user_topic_pain = _compute_push_ranked_user_MSU( user_trail, window_starts, ssn_starts, self.update_emit_times, self.update_confidences, self.update_lengths, updates, user_instance.V, user_instance.L, self.query_duration, self.ignore_verbosity) # logger.debug(' user {} done'.format(self.user_counter)) return user_topic_msu, user_topic_pain
class MSURankedOrder(ModeledStreamUtility, RankedInterfaceMixin): """ Simulates users reading updates in ranked order at every session. Users persist in reading updates at every session based on the RBP user model [Moffat, TOIS 2008] """ def __init__(self, num_users, RBP_persistence_params, \ population_time_away_mean, population_time_away_stddev, \ pop_lateness_decay, window_size, fix_persistence, fix_time_away_param): super(MSURankedOrder, self).__init__(num_users) self.population_model = LognormalAwayPersistenceSessionsPopulationModel(self.seed, population_time_away_mean, \ population_time_away_stddev, \ RBP_persistence_params, \ pop_lateness_decay, ) self.sampled_users = [] self.update_emit_times = [] self.window_size = window_size self.fix_persistence = fix_persistence self.fix_time_away_param = fix_time_away_param self.user_counter = 0 def sample_users_from_population(self, query_duration): if self.sampled_users: self.sampled_users = [] self.query_duration = query_duration # reset the seed so that the same users are # generated every time this function is called self.population_model.reset_random_seed() for ui in xrange(self.num_users): A, P, V, L = self.population_model.generate_user_params() if self.fix_persistence: P = self.fix_persistence if self.fix_time_away_param: A = self.population_model.M_A self.sampled_users.append( LognormalAwayRBPPersistenceUserModel(A, P, V, L)) def _compute_user_MSU(self, user_instance, updates): self.user_counter += 1 if self.user_counter % 1000 == 0: logger.warning('{0} users simulated'.format(self.user_counter)) user_topic_msu = 0.0 user_topic_pain = 0.0 current_time = 0.0 oldest_available_update_idx = 0 updates_read = defaultdict(bool) already_seen_ngts = {} ssn_starts = [0.0] num_updates = len(updates) self.reset_interface() #logger.warning('user {0}'.format(str(user_instance))) logger.debug('num_updates {0}'.format(num_updates)) current_time = 0 while current_time < self.query_duration: logger.debug('current_time {0}, window start {1}'.format( current_time, current_time - (current_time if self.window_size == -1 else self.window_size))) # find available sentences to read at this user session # find latest update at current_time (session starts) latest_update_idx = bisect.bisect(self.update_emit_times, current_time) latest_update_idx -= 1 window_lower_limit = 0 if self.window_size != -1: window_lower_limit = current_time - self.window_size # consider updates from within past window_size seconds only oldest_available_update_idx = bisect.bisect( self.update_emit_times, window_lower_limit) if oldest_available_update_idx == num_updates: # no more updates to read # no need to eval further sessions logger.debug('looked at all updates') break #oldest_available_update_idx = 0 if oldest_available_update_idx == 0 else oldest_available_update_idx - 1 else: # consider all update from start of query_duration oldest_available_update_idx = 0 logger.debug( 'oldest_available_update_idx {0}, latest_update_idx {1}'. format(oldest_available_update_idx, latest_update_idx)) logger.debug('available {0}'.format( str(updates[oldest_available_update_idx:latest_update_idx + 1]))) self.add_updates_to_conf_heap(oldest_available_update_idx, latest_update_idx, updates) logger.debug('conf_heap {0}'.format(self.conf_heap)) # read sentences until user persists is_first_update = True for upd_idx in self.update_presentation_order( oldest_available_update_idx, latest_update_idx, updates): update = updates[upd_idx] logger.debug('update {0}'.format(str(update))) if update.time < window_lower_limit: # this update is not to be considered for display to the user anymore logger.debug("update is OUT OF WINDOW LIMIT") self.remove_update_from_conf_heap(update.updid) continue #logger.debug('upddate {0}'.format(str(update))) # will the user persist in reading this udpate if not is_first_update and np.random.random_sample( ) > user_instance.P: # the user will not read this update logger.debug('USER DID NOT PERSIST') break # note time elapsed for reading each update; increment current_time upd_time_to_read = (float(update.wlen) / user_instance.V) current_time += upd_time_to_read is_first_update = False updates_read[update.updid] = True # the user PERSISTED to read this update logger.debug('READ UPDATE') self.remove_update_from_conf_heap(update.updid) update_msu = 0.0 # check for nuggets and update user msu for ngt in update.nuggets: if ngt.ngtid in already_seen_ngts: continue ngt_after = bisect.bisect(ssn_starts, ngt.time) alpha = (len(ssn_starts) - 1) - ngt_after already_seen_ngts[ngt.ngtid] = alpha alpha = 0 if alpha < 0 else alpha ngt_msu = (self.population_model.L**alpha) update_msu += ngt_msu user_topic_msu += update_msu if not update.nuggets: user_topic_pain += 1 # increment current_time with time spent away time_away = user_instance.get_next_time_away_duration( current_time, self.query_duration) current_time += time_away ssn_starts.append(current_time) #logger.warning( str(user_instance) ) #logger.warning( 'gain {}, pain {}'.format(user_topic_msu, user_topic_pain)) return (user_topic_msu, user_topic_pain)