Beispiel #1
0
    def teach(self, num_timesteps=2000):
        for t in range(num_timesteps):
            # find slopes for each task
            if len(self.dscores) > 0:
                if isinstance(self.policy, ThompsonPolicy):
                    slopes = [
                        np.random.choice(drs)
                        for drs in np.array(self.dscores).T
                    ]
                else:
                    slopes = np.mean(self.dscores, axis=0)
            else:
                slopes = np.ones(self.env.num_actions)

            p = self.policy(np.abs(slopes) if self.abs else slopes)
            r, train_done, val_done = self.env.step(p)
            if val_done:
                return self.env.model.epochs

            # log delta score
            dr = r - self.prevr
            self.prevr = r
            self.dscores.append(dr)

            if self.writer:
                for i in range(self.env.num_actions):
                    add_summary(self.writer, "slopes/task_%d" % (i + 1),
                                slopes[i], self.env.model.epochs)
                    add_summary(self.writer, "probabilities/task_%d" % (i + 1),
                                p[i], self.env.model.epochs)

        return self.env.model.epochs
Beispiel #2
0
    def teach(self, num_timesteps=2000):
        curriculum_step = 0
        for t in range(num_timesteps):
            p = self.curriculum[curriculum_step]
            r, train_done, val_done = self.env.step(p)
            if train_done and curriculum_step < len(self.curriculum) - 1:
                curriculum_step = curriculum_step + 1
            if val_done:
                return self.env.model.epochs

            if self.writer:
                for i in range(self.env.num_actions):
                    add_summary(self.writer, "probabilities/task_%d" % (i + 1),
                                p[i], self.env.model.epochs)

        return self.env.model.epochs
Beispiel #3
0
    def step(self, train_dist):
        print("Training on", train_dist)
        train_data = self.model.generate_data(train_dist, self.train_size)
        history = self.model.train_epoch(train_data, self.val_data)
        # train_accs = self.model.accuracy_per_length(*train_data)
        val_accs = self.model.accuracy_per_length(*self.val_data)

        train_done = history['full_number_accuracy'][-1] > 0.99
        val_done = history['val_full_number_accuracy'][-1] > 0.99

        if self.writer:
            for k, v in history.items():
                add_summary(self.writer, "model/" + k, v[-1],
                            self.model.epochs)
            for i in range(self.num_actions):
                # add_summary(self.writer, "train_accuracies/task_%d" % (i + 1), train_accs[i], self.model.epochs)
                add_summary(self.writer, "valid_accuracies/task_%d" % (i + 1),
                            val_accs[i], self.model.epochs)

        return val_accs, train_done, val_done
Beispiel #4
0
    def teach(self, num_timesteps=2000):
        for t in range(num_timesteps // self.window_size):
            p = self.policy(np.abs(self.Q) if self.abs else self.Q)
            scores = [[] for _ in range(len(self.Q))]
            for i in range(self.window_size):
                r, train_done, val_done = self.env.step(p)
                if val_done:
                    return self.env.model.epochs
                for a, score in enumerate(r):
                    if not np.isnan(score):
                        scores[a].append(score)
            s = [
                estimate_slope(list(range(len(sc))), sc) if len(sc) > 1 else 1
                for sc in scores
            ]
            self.Q += self.lr * (s - self.Q)

            if self.writer:
                for i in range(self.env.num_actions):
                    add_summary(self.writer, "Q_values/task_%d" % (i + 1),
                                self.Q[i], self.env.model.epochs)
                    add_summary(self.writer, "slopes/task_%d" % (i + 1), s[i],
                                self.env.model.epochs)
                    add_summary(self.writer, "probabilities/task_%d" % (i + 1),
                                p[i], self.env.model.epochs)

        return self.env.model.epochs
Beispiel #5
0
    def teach(self, num_timesteps=2000):
        for t in range(num_timesteps):
            slopes = [
                estimate_slope(timesteps, scores) if len(scores) > 1 else 1
                for timesteps, scores in zip(self.timesteps, self.scores)
            ]
            p = self.policy(np.abs(slopes) if self.abs else slopes)
            r, train_done, val_done = self.env.step(p)
            if val_done:
                return self.env.model.epochs
            for a, s in enumerate(r):
                if not np.isnan(s):
                    self.scores[a].append(s)
                    self.timesteps[a].append(t)

            if self.writer:
                for i in range(self.env.num_actions):
                    add_summary(self.writer, "slopes/task_%d" % (i + 1),
                                slopes[i], self.env.model.epochs)
                    add_summary(self.writer, "probabilities/task_%d" % (i + 1),
                                p[i], self.env.model.epochs)

        return self.env.model.epochs
Beispiel #6
0
    def teach(self, num_timesteps=2000):
        for t in range(num_timesteps):
            p = self.policy(np.abs(self.Q) if self.abs else self.Q)
            r, train_done, val_done = self.env.step(p)
            if val_done:
                return self.env.model.epochs
            s = r - self.prevr

            # safeguard against not sampling particular action at all
            s = np.nan_to_num(s)
            self.Q += self.lr * (s - self.Q)
            self.prevr = r

            if self.writer:
                for i in range(self.env.num_actions):
                    add_summary(self.writer, "Q_values/task_%d" % (i + 1),
                                self.Q[i], self.env.model.epochs)
                    add_summary(self.writer, "slopes/task_%d" % (i + 1), s[i],
                                self.env.model.epochs)
                    add_summary(self.writer, "probabilities/task_%d" % (i + 1),
                                p[i], self.env.model.epochs)

        return self.env.model.epochs