def map_lr(self):
     lr_values = dict()
     with self.lrs_lock:
         for lr in self.lrs:
             for key, value in lr.items():
                 add_dict(lr_values, key, [value * (1. - 2. * self.learnrate * self.regulizer) ** self.i])
     return lr_values
def stat_dict_to_list(attr_stat_dict):
    """
    :param attr_stat_dict: {attribute_value: {count: int, revenue: int, rate: int}}
    :return: [attribute_value], {counts=[], revenue=[], rate=[]}
    """
    attr_values = sorted(list(attr_stat_dict.keys()))
    attr_stat_lists = dict()
    for attribute_value in attr_values:
        for stat_key in STATS:
            add_dict(attr_stat_lists, stat_key, [attr_stat_dict[attribute_value][stat_key]])
    return list(attr_values), attr_stat_lists
def stat_dict_to_list(attr_stat_dict):
    """
    :param attr_stat_dict: {attribute_value: {count: int, revenue: int, rate: int}}
    :return: [attribute_value], {counts=[], revenue=[], rate=[]}
    """
    attr_values = sorted(list(attr_stat_dict.keys()))
    attr_stat_lists = dict()
    for attribute_value in attr_values:
        for stat_key in STATS:
            add_dict(attr_stat_lists, stat_key,
                     [attr_stat_dict[attribute_value][stat_key]])
    return list(attr_values), attr_stat_lists
def lsr_update(t, y, x, weights, learnrate, default_value=None):
    """
    Update values using logistic streaming regression
    :param t: target
    :param y: output
    :param x: input
    :param weights:
    :return:
    """
    if default_value is None:
        default_value = lambda: None
    error = t - y
    add_dict(weights, "intercept", learnrate * error, default_value())
    for key in x:
        add_dict(weights, key, learnrate * error * x[key], default_value())
    return weights
def average_param_reward(files):
    average = {}
    for file in files:
        agent = np.load(os.path.join('agents', file)).item()
        log = np.load(os.path.join('log', file)).item()
        if 'reward' in log:
            if 'thomp(' in file:
                learnrate = agent['learnrate']
                regulizer = agent['regulizer']
                reward = log['reward']
                create_dictionary(average, learnrate, {})
                add_dict(average[learnrate], regulizer, [reward], [])
            elif 'greedy' in file:
                create_dictionary(average, 0.0, {})
                add_dict(average[0.0], 'greedy', [log['reward']], [])
    k2_length = 0
    lengths = {}
    for k1 in average:
        k2_length = max(k2_length, len(average[k1]))
        for k2 in average[k1]:
            create_dictionary(lengths, k1, {})
            lengths[k1][k2] = len(average[k1][k2])
            average[k1][k2] = sum(average[k1][k2]) / len(average[k1][k2])
    return average, lengths
def average_param_reward(files):
    average = {}
    for file in files:
        agent = np.load(os.path.join('agents', file)).item()
        log = np.load(os.path.join('log', file)).item()
        if 'reward' in log:
            if 'thomp(' in file:
                learnrate = agent['learnrate']
                regulizer = agent['regulizer']
                reward = log['reward']
                create_dictionary(average, learnrate, {})
                add_dict(average[learnrate], regulizer, [reward], [])
            elif 'greedy' in file:
                create_dictionary(average, 0.0, {})
                add_dict(average[0.0], 'greedy', [log['reward']], [])
    k2_length = 0
    lengths = {}
    for k1 in average:
        k2_length = max(k2_length, len(average[k1]))
        for k2 in average[k1]:
            create_dictionary(lengths, k1, {})
            lengths[k1][k2] = len(average[k1][k2])
            average[k1][k2] = sum(average[k1][k2]) / len(average[k1][k2])
    return average, lengths
 def run(self):
     for run_id in self.run_idx:
         for j in range(self.MAX_I + 1):
             context = get_context(run_id, j)
             action = self.agent.decide(context)
             result = propose_page(run_id, j, **action)
             self.agent.feedback(result)
             add_dict(self.data, run_id, [{'context': context, 'action': action, 'result': result}], [])
             success = "Success!" if result["effect"]["Success"] else ""
             print(self.to_string(action, run_id, j, success))
         add_dict(self.data, "reward", self.agent.cum_reward / self.agent.i)
         add_dict(self.data, "cum_reward", self.agent.cum_reward)
         self.save()
 def run(self):
     for run_id in self.run_idx:
         for j in range(self.MAX_I + 1):
             context = get_context(run_id, j)
             action = self.agent.decide(context)
             result = propose_page(run_id, j, **action)
             self.agent.feedback(result)
             add_dict(self.data, run_id, [{
                 'context': context,
                 'action': action,
                 'result': result
             }], [])
             success = "Success!" if result["effect"]["Success"] else ""
             print(self.to_string(action, run_id, j, success))
         add_dict(self.data, "reward", self.agent.cum_reward / self.agent.i)
         add_dict(self.data, "cum_reward", self.agent.cum_reward)
         self.save()