def map_lr(self): lr_values = dict() with self.lrs_lock: for lr in self.lrs: for key, value in lr.items(): add_dict(lr_values, key, [value * (1. - 2. * self.learnrate * self.regulizer) ** self.i]) return lr_values
def stat_dict_to_list(attr_stat_dict): """ :param attr_stat_dict: {attribute_value: {count: int, revenue: int, rate: int}} :return: [attribute_value], {counts=[], revenue=[], rate=[]} """ attr_values = sorted(list(attr_stat_dict.keys())) attr_stat_lists = dict() for attribute_value in attr_values: for stat_key in STATS: add_dict(attr_stat_lists, stat_key, [attr_stat_dict[attribute_value][stat_key]]) return list(attr_values), attr_stat_lists
def lsr_update(t, y, x, weights, learnrate, default_value=None): """ Update values using logistic streaming regression :param t: target :param y: output :param x: input :param weights: :return: """ if default_value is None: default_value = lambda: None error = t - y add_dict(weights, "intercept", learnrate * error, default_value()) for key in x: add_dict(weights, key, learnrate * error * x[key], default_value()) return weights
def average_param_reward(files): average = {} for file in files: agent = np.load(os.path.join('agents', file)).item() log = np.load(os.path.join('log', file)).item() if 'reward' in log: if 'thomp(' in file: learnrate = agent['learnrate'] regulizer = agent['regulizer'] reward = log['reward'] create_dictionary(average, learnrate, {}) add_dict(average[learnrate], regulizer, [reward], []) elif 'greedy' in file: create_dictionary(average, 0.0, {}) add_dict(average[0.0], 'greedy', [log['reward']], []) k2_length = 0 lengths = {} for k1 in average: k2_length = max(k2_length, len(average[k1])) for k2 in average[k1]: create_dictionary(lengths, k1, {}) lengths[k1][k2] = len(average[k1][k2]) average[k1][k2] = sum(average[k1][k2]) / len(average[k1][k2]) return average, lengths
def run(self): for run_id in self.run_idx: for j in range(self.MAX_I + 1): context = get_context(run_id, j) action = self.agent.decide(context) result = propose_page(run_id, j, **action) self.agent.feedback(result) add_dict(self.data, run_id, [{'context': context, 'action': action, 'result': result}], []) success = "Success!" if result["effect"]["Success"] else "" print(self.to_string(action, run_id, j, success)) add_dict(self.data, "reward", self.agent.cum_reward / self.agent.i) add_dict(self.data, "cum_reward", self.agent.cum_reward) self.save()
def run(self): for run_id in self.run_idx: for j in range(self.MAX_I + 1): context = get_context(run_id, j) action = self.agent.decide(context) result = propose_page(run_id, j, **action) self.agent.feedback(result) add_dict(self.data, run_id, [{ 'context': context, 'action': action, 'result': result }], []) success = "Success!" if result["effect"]["Success"] else "" print(self.to_string(action, run_id, j, success)) add_dict(self.data, "reward", self.agent.cum_reward / self.agent.i) add_dict(self.data, "cum_reward", self.agent.cum_reward) self.save()