コード例 #1
0
    def __init__(self, num_users, RBP_persistence_params, \
        population_time_away_mean, population_time_away_stddev, \
        lateness_decay,
        window_size,
        fix_persistence,
        fix_away_mean,
        fix_reading_mean,
        push_threshold,
        interaction_mode):

        super(MSUPushRankedOrder, self).__init__(num_users)

        self.population_model = LognormalAwayPersistenceSessionsPopulationModel(self.seed,
                                population_time_away_mean, \
                                population_time_away_stddev, \
                                RBP_persistence_params, \
                                lateness_decay, )

        self.sampled_users = []
        self.update_emit_times = []
        self.update_confidences = []
        self.update_lengths = []
        self.window_size = window_size
        self.fix_persistence = fix_persistence
        self.fix_reading_mean = fix_reading_mean
        self.fix_away_mean = fix_away_mean
        self.push_threshold = push_threshold
        self.interaction_mode = interaction_mode
        self.user_counter = 0
        self.ignore_verbosity = False
    def __init__(self, num_users, RBP_persistence_params, \
        population_time_away_mean, population_time_away_stddev, \
        pop_lateness_decay, window_size, fix_persistence, fix_time_away_param):

        super(MSURankedOrder, self).__init__(num_users)

        self.population_model = LognormalAwayPersistenceSessionsPopulationModel(self.seed,
                                population_time_away_mean, \
                                population_time_away_stddev, \
                                RBP_persistence_params, \
                                pop_lateness_decay, )

        self.sampled_users = []
        self.update_emit_times = []
        self.window_size = window_size
        self.fix_persistence = fix_persistence
        self.fix_time_away_param = fix_time_away_param
        self.user_counter = 0
コード例 #3
0
class MSUPushRankedOrder(ModeledStreamUtility, PushRankedInterfaceMixin):
    """
    Simulates users reading updates in ranked order at every session.
    Users persist in reading updates at every session based on the RBP user model [Moffat, TOIS 2008]
    """

    def __init__(self, num_users, RBP_persistence_params, \
        population_time_away_mean, population_time_away_stddev, \
        lateness_decay,
        window_size,
        fix_persistence,
        fix_away_mean,
        fix_reading_mean,
        push_threshold,
        interaction_mode):

        super(MSUPushRankedOrder, self).__init__(num_users)

        self.population_model = LognormalAwayPersistenceSessionsPopulationModel(self.seed,
                                population_time_away_mean, \
                                population_time_away_stddev, \
                                RBP_persistence_params, \
                                lateness_decay, )

        self.sampled_users = []
        self.update_emit_times = []
        self.update_confidences = []
        self.update_lengths = []
        self.window_size = window_size
        self.fix_persistence = fix_persistence
        self.fix_reading_mean = fix_reading_mean
        self.fix_away_mean = fix_away_mean
        self.push_threshold = push_threshold
        self.interaction_mode = interaction_mode
        self.user_counter = 0
        self.ignore_verbosity = False

    def normalize_confidences(self):
        #logger.warning(self.update_confidences)
        maxconf = max(self.update_confidences)
        minconf = min(self.update_confidences)
        if maxconf == minconf:
            maxconf = 1.0
            minconf = 0.0
        self.update_confidences = array.array(
            'd',
            map(lambda x: (x - minconf) / (maxconf - minconf),
                self.update_confidences))
        #logger.warning(self.update_confidences)

    def initialize_structures_for_topic(self, topic_updates):
        self.presort_updates(topic_updates)
        self.update_emit_times = array.array(
            'd', [upd.time for upd in topic_updates])
        self.update_confidences = array.array(
            'd', [upd.conf for upd in topic_updates])
        self.normalize_confidences()
        self.update_lengths = array.array('d',
                                          [upd.wlen for upd in topic_updates])

    def sample_users_from_population(self, query_duration):
        if self.sampled_users:
            self.sampled_users = []

        self.query_duration = query_duration

        # reset the seed so that the same users are
        # generated every time this function is called
        self.population_model.reset_random_seed()

        for ui in xrange(self.num_users):
            A, P, V, L = self.population_model.generate_user_params()
            if self.fix_persistence:
                P = self.fix_persistence
            if self.fix_away_mean:
                A = self.fix_away_mean
            if self.fix_reading_mean:
                V = self.fix_reading_mean
            self.sampled_users.append(
                LognormalAwayRBPPersistenceUserModel(A, P, V, L))

    def _compute_user_MSU(self, user_instance, updates):

        # self.user_counter += 1
        # if self.user_counter % 1000 == 0:
        #     logger.warning('{0} users simulated'.format(self.user_counter))

        user_topic_msu = 0.0
        user_topic_pain = 0.0

        # generate user trail
        # - if in case a user reads till the start of the next session.
        #   - new ranked updates are shown when the new session starts
        #     - simulates that the user though persisting now wants new information (reload page because I am scraping the bottom here)

        #if self.user_counter == 23:
        #logger.setLevel(logging.DEBUG)
        #logger.warning('user {}'.format(user_instance))

        user_trail = self.generate_user_trail(
            user_instance, self.update_confidences, self.update_emit_times,
            self.query_duration, self.push_threshold, self.interaction_mode)
        # logger.debug('user_trail {}'.format(user_trail))
        window_starts = array.array(
            'd',
            map(
                lambda x: x[0] - self.window_size
                if x[0] - self.window_size >= 0.0 else 0.0, user_trail))
        if self.window_size == -1:
            window_starts = array.array('d', [0.0] * len(user_trail))
        # logger.debug('window_starts {}'.format(str(window_starts)))
        ssn_starts = array.array('d', [s for s, r, t in user_trail])
        # logger.debug('ssn_starts {}'.format(str(ssn_starts)))
        num_sessions = len(user_trail)

        # logger.debug('----------- user {} {}-------------------'.format(self.user_counter, user_instance))

        user_topic_msu, user_topic_pain = _compute_push_ranked_user_MSU(
            user_trail, window_starts, ssn_starts, self.update_emit_times,
            self.update_confidences, self.update_lengths, updates,
            user_instance.V, user_instance.L, self.query_duration,
            self.ignore_verbosity)
        # logger.debug(' user {} done'.format(self.user_counter))

        return user_topic_msu, user_topic_pain
class MSURankedOrder(ModeledStreamUtility, RankedInterfaceMixin):
    """
    Simulates users reading updates in ranked order at every session.
    Users persist in reading updates at every session based on the RBP user model [Moffat, TOIS 2008]
    """

    def __init__(self, num_users, RBP_persistence_params, \
        population_time_away_mean, population_time_away_stddev, \
        pop_lateness_decay, window_size, fix_persistence, fix_time_away_param):

        super(MSURankedOrder, self).__init__(num_users)

        self.population_model = LognormalAwayPersistenceSessionsPopulationModel(self.seed,
                                population_time_away_mean, \
                                population_time_away_stddev, \
                                RBP_persistence_params, \
                                pop_lateness_decay, )

        self.sampled_users = []
        self.update_emit_times = []
        self.window_size = window_size
        self.fix_persistence = fix_persistence
        self.fix_time_away_param = fix_time_away_param
        self.user_counter = 0

    def sample_users_from_population(self, query_duration):
        if self.sampled_users:
            self.sampled_users = []

        self.query_duration = query_duration

        # reset the seed so that the same users are
        # generated every time this function is called
        self.population_model.reset_random_seed()

        for ui in xrange(self.num_users):
            A, P, V, L = self.population_model.generate_user_params()
            if self.fix_persistence:
                P = self.fix_persistence
            if self.fix_time_away_param:
                A = self.population_model.M_A
            self.sampled_users.append(
                LognormalAwayRBPPersistenceUserModel(A, P, V, L))

    def _compute_user_MSU(self, user_instance, updates):

        self.user_counter += 1
        if self.user_counter % 1000 == 0:
            logger.warning('{0} users simulated'.format(self.user_counter))

        user_topic_msu = 0.0
        user_topic_pain = 0.0
        current_time = 0.0
        oldest_available_update_idx = 0

        updates_read = defaultdict(bool)
        already_seen_ngts = {}
        ssn_starts = [0.0]
        num_updates = len(updates)

        self.reset_interface()

        #logger.warning('user {0}'.format(str(user_instance)))
        logger.debug('num_updates {0}'.format(num_updates))

        current_time = 0
        while current_time < self.query_duration:

            logger.debug('current_time {0}, window start {1}'.format(
                current_time, current_time - (current_time if self.window_size
                                              == -1 else self.window_size)))

            # find available sentences to read at this user session

            # find latest update at current_time (session starts)
            latest_update_idx = bisect.bisect(self.update_emit_times,
                                              current_time)
            latest_update_idx -= 1

            window_lower_limit = 0
            if self.window_size != -1:
                window_lower_limit = current_time - self.window_size

                # consider updates from within past window_size seconds only
                oldest_available_update_idx = bisect.bisect(
                    self.update_emit_times, window_lower_limit)
                if oldest_available_update_idx == num_updates:
                    # no more updates to read
                    # no need to eval further sessions
                    logger.debug('looked at all updates')
                    break
                #oldest_available_update_idx = 0 if oldest_available_update_idx == 0 else oldest_available_update_idx - 1
            else:
                # consider all update from start of query_duration
                oldest_available_update_idx = 0

            logger.debug(
                'oldest_available_update_idx {0}, latest_update_idx {1}'.
                format(oldest_available_update_idx, latest_update_idx))
            logger.debug('available {0}'.format(
                str(updates[oldest_available_update_idx:latest_update_idx +
                            1])))

            self.add_updates_to_conf_heap(oldest_available_update_idx,
                                          latest_update_idx, updates)
            logger.debug('conf_heap {0}'.format(self.conf_heap))

            # read sentences until user persists
            is_first_update = True
            for upd_idx in self.update_presentation_order(
                    oldest_available_update_idx, latest_update_idx, updates):

                update = updates[upd_idx]
                logger.debug('update {0}'.format(str(update)))

                if update.time < window_lower_limit:
                    # this update is not to be considered for display to the user anymore
                    logger.debug("update is OUT OF WINDOW LIMIT")
                    self.remove_update_from_conf_heap(update.updid)
                    continue

                #logger.debug('upddate {0}'.format(str(update)))

                # will the user persist in reading this udpate
                if not is_first_update and np.random.random_sample(
                ) > user_instance.P:
                    # the user will not read this update
                    logger.debug('USER DID NOT PERSIST')
                    break

                # note time elapsed for reading each update; increment current_time
                upd_time_to_read = (float(update.wlen) / user_instance.V)
                current_time += upd_time_to_read

                is_first_update = False

                updates_read[update.updid] = True
                # the user PERSISTED to read this update
                logger.debug('READ UPDATE')
                self.remove_update_from_conf_heap(update.updid)

                update_msu = 0.0
                # check for nuggets and update user msu
                for ngt in update.nuggets:
                    if ngt.ngtid in already_seen_ngts:
                        continue
                    ngt_after = bisect.bisect(ssn_starts, ngt.time)
                    alpha = (len(ssn_starts) - 1) - ngt_after
                    already_seen_ngts[ngt.ngtid] = alpha
                    alpha = 0 if alpha < 0 else alpha
                    ngt_msu = (self.population_model.L**alpha)
                    update_msu += ngt_msu

                user_topic_msu += update_msu

                if not update.nuggets:
                    user_topic_pain += 1

            # increment current_time with time spent away
            time_away = user_instance.get_next_time_away_duration(
                current_time, self.query_duration)
            current_time += time_away
            ssn_starts.append(current_time)

        #logger.warning( str(user_instance) )
        #logger.warning( 'gain {}, pain {}'.format(user_topic_msu, user_topic_pain))

        return (user_topic_msu, user_topic_pain)