def teach(self, num_timesteps=2000): for t in range(num_timesteps): # find slopes for each task if len(self.dscores) > 0: if isinstance(self.policy, ThompsonPolicy): slopes = [ np.random.choice(drs) for drs in np.array(self.dscores).T ] else: slopes = np.mean(self.dscores, axis=0) else: slopes = np.ones(self.env.num_actions) p = self.policy(np.abs(slopes) if self.abs else slopes) r, train_done, val_done = self.env.step(p) if val_done: return self.env.model.epochs # log delta score dr = r - self.prevr self.prevr = r self.dscores.append(dr) if self.writer: for i in range(self.env.num_actions): add_summary(self.writer, "slopes/task_%d" % (i + 1), slopes[i], self.env.model.epochs) add_summary(self.writer, "probabilities/task_%d" % (i + 1), p[i], self.env.model.epochs) return self.env.model.epochs
def teach(self, num_timesteps=2000): curriculum_step = 0 for t in range(num_timesteps): p = self.curriculum[curriculum_step] r, train_done, val_done = self.env.step(p) if train_done and curriculum_step < len(self.curriculum) - 1: curriculum_step = curriculum_step + 1 if val_done: return self.env.model.epochs if self.writer: for i in range(self.env.num_actions): add_summary(self.writer, "probabilities/task_%d" % (i + 1), p[i], self.env.model.epochs) return self.env.model.epochs
def step(self, train_dist): print("Training on", train_dist) train_data = self.model.generate_data(train_dist, self.train_size) history = self.model.train_epoch(train_data, self.val_data) # train_accs = self.model.accuracy_per_length(*train_data) val_accs = self.model.accuracy_per_length(*self.val_data) train_done = history['full_number_accuracy'][-1] > 0.99 val_done = history['val_full_number_accuracy'][-1] > 0.99 if self.writer: for k, v in history.items(): add_summary(self.writer, "model/" + k, v[-1], self.model.epochs) for i in range(self.num_actions): # add_summary(self.writer, "train_accuracies/task_%d" % (i + 1), train_accs[i], self.model.epochs) add_summary(self.writer, "valid_accuracies/task_%d" % (i + 1), val_accs[i], self.model.epochs) return val_accs, train_done, val_done
def teach(self, num_timesteps=2000): for t in range(num_timesteps // self.window_size): p = self.policy(np.abs(self.Q) if self.abs else self.Q) scores = [[] for _ in range(len(self.Q))] for i in range(self.window_size): r, train_done, val_done = self.env.step(p) if val_done: return self.env.model.epochs for a, score in enumerate(r): if not np.isnan(score): scores[a].append(score) s = [ estimate_slope(list(range(len(sc))), sc) if len(sc) > 1 else 1 for sc in scores ] self.Q += self.lr * (s - self.Q) if self.writer: for i in range(self.env.num_actions): add_summary(self.writer, "Q_values/task_%d" % (i + 1), self.Q[i], self.env.model.epochs) add_summary(self.writer, "slopes/task_%d" % (i + 1), s[i], self.env.model.epochs) add_summary(self.writer, "probabilities/task_%d" % (i + 1), p[i], self.env.model.epochs) return self.env.model.epochs
def teach(self, num_timesteps=2000): for t in range(num_timesteps): slopes = [ estimate_slope(timesteps, scores) if len(scores) > 1 else 1 for timesteps, scores in zip(self.timesteps, self.scores) ] p = self.policy(np.abs(slopes) if self.abs else slopes) r, train_done, val_done = self.env.step(p) if val_done: return self.env.model.epochs for a, s in enumerate(r): if not np.isnan(s): self.scores[a].append(s) self.timesteps[a].append(t) if self.writer: for i in range(self.env.num_actions): add_summary(self.writer, "slopes/task_%d" % (i + 1), slopes[i], self.env.model.epochs) add_summary(self.writer, "probabilities/task_%d" % (i + 1), p[i], self.env.model.epochs) return self.env.model.epochs
def teach(self, num_timesteps=2000): for t in range(num_timesteps): p = self.policy(np.abs(self.Q) if self.abs else self.Q) r, train_done, val_done = self.env.step(p) if val_done: return self.env.model.epochs s = r - self.prevr # safeguard against not sampling particular action at all s = np.nan_to_num(s) self.Q += self.lr * (s - self.Q) self.prevr = r if self.writer: for i in range(self.env.num_actions): add_summary(self.writer, "Q_values/task_%d" % (i + 1), self.Q[i], self.env.model.epochs) add_summary(self.writer, "slopes/task_%d" % (i + 1), s[i], self.env.model.epochs) add_summary(self.writer, "probabilities/task_%d" % (i + 1), p[i], self.env.model.epochs) return self.env.model.epochs