def feedback(self, result):
     super().feedback(result)
     x = ThompsonLogisticAgent.design(self.last_context, self.last_action)
     for i in range(len(self.lrs)):
         if random.random() > .5:
             p = lr_predict(x, self.lrs[i], self.i, self.learnrate, self.regulizer, regression.random_coef)
             with self.lrs_lock:
                 self.lrs[i] = lsr_update(self.last_success, p, x, self.lrs[i], self.learnrate, regression.random_coef)
                 self.lrs[i] = lsr_update(self.last_success, p, x, self.lrs[i], self.learnrate, regression.random_coef)
 def decide(self, context):
     self.last_context = context["context"]
     lr = random.sample(self.lrs, 1)[0]
     best_value = -1e100
     self.last_action = None
     actions = random.sample(self.actions, self.action_n)
     xs = map(lambda x: ThompsonLogisticAgent.design(self.last_context, x), actions)
     for x_i, x in enumerate(xs):
         # x = ThompsonLogisticAgent.design(self.last_context, action)
         p = lr_predict(x, lr, self.i, self.learnrate, self.regulizer, regression.random_coef)
         value = p * actions[x_i]['price']
         if value > best_value:
             best_value = value
             self.last_action = actions[x_i]
     return self.last_action