예제 #1
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_load_to_filled_buffer(self):
        """
        Load to already filled buffer

        Add to transitions
        """
        buffer_size1 = 10
        buffer_size2 = 10
        env_dict = {"a": {}}

        rb1 = ReplayBuffer(buffer_size1, env_dict)
        rb2 = ReplayBuffer(buffer_size2, env_dict)
        rb3 = ReplayBuffer(buffer_size2, env_dict)

        a = [1, 2, 3, 4]
        b = [5, 6]

        rb1.add(a=a)
        rb2.add(a=b)
        rb3.add(a=b)

        fname="filled.npz"
        rb1.save_transitions(fname)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"][len(b):])
        np.testing.assert_allclose(t1["a"], t3["a"][len(b):])
예제 #2
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_smaller_buffer(self):
        """
        Load to smaller buffer

        Loaded buffer only stored last buffer_size transitions
        """
        buffer_size1 = 10
        buffer_size2 = 4
        env_dict = {"a": {}}

        rb1 = ReplayBuffer(buffer_size1, env_dict)
        rb2 = ReplayBuffer(buffer_size2, env_dict)
        rb3 = ReplayBuffer(buffer_size2, env_dict)

        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

        fname = "smaller.npz"
        rb1.save_transitions(fname)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"][-buffer_size2:],t2["a"])
예제 #3
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_unsafe_next_of_stack_compress(self):
        """
        Load next_of and stack_compress transitions
        """
        buffer_size = 10
        env_dict = {"a": {"shape": 3}}

        rb1 = ReplayBuffer(buffer_size, env_dict, next_of="a", stack_compress="a")
        rb2 = ReplayBuffer(buffer_size, env_dict, next_of="a", stack_compress="a")
        rb3 = ReplayBuffer(buffer_size, env_dict, next_of="a", stack_compress="a")

        a = [[1, 2, 3],
             [2, 3, 4],
             [3, 4, 5],
             [4, 5, 6],
             [5, 6, 7],
             [6, 7, 8]]

        rb1.add(a=a[:-1], next_a=a[1:])

        fname="unsafe_next_of_stack_compress.npz"
        rb1.save_transitions(fname, safe=False)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["next_a"], t2["next_a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
        np.testing.assert_allclose(t1["next_a"], t3["next_a"])
예제 #4
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_stack_compress(self):
        """
        Load stack_compress transitions
        """
        buffer_size = 10
        env_dict = {"a": {"shape": 3}}

        rb1 = ReplayBuffer(buffer_size, env_dict, stack_compress="a")
        rb2 = ReplayBuffer(buffer_size, env_dict, stack_compress="a")
        rb3 = ReplayBuffer(buffer_size, env_dict, stack_compress="a")

        a = [[1, 2, 3],
             [2, 3, 4],
             [3, 4, 5],
             [4, 5, 6]]

        rb1.add(a=a)

        fname="stack_compress.npz"
        rb1.save_transitions(fname)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
예제 #5
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_incompatible_unsafe_stack_compress(self):
        """
        Load incompatible stack_compress transitions with unsafe mode
        """
        buffer_size = 10
        env_dict = {"a": {"shape": 3}}

        rb1 = ReplayBuffer(buffer_size, env_dict, stack_compress="a")
        rb2 = ReplayBuffer(buffer_size, env_dict)
        rb3 = ReplayBuffer(buffer_size, env_dict)

        a = [[1, 2, 3],
             [2, 3, 4],
             [3, 4, 5],
             [4, 5, 6]]

        rb1.add(a=a)

        fname="incompatible_unsafe_stack_compress.npz"
        rb1.save_transitions(fname, safe=False)
        rb2.load_transitions(fname)
        rb3.load_transitions(fname)

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
예제 #6
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_incompatible_unsafe_next_of(self):
        """
        Load incompatible next_of transitions with unsafe mode
        """
        buffer_size = 10
        env_dict1 = {"a": {}}
        env_dict2 = {"a": {}, "next_a": {}}

        rb1 = ReplayBuffer(buffer_size, env_dict1, next_of="a")
        rb2 = ReplayBuffer(buffer_size, env_dict2)
        rb3 = ReplayBuffer(buffer_size, env_dict2)

        a = [1, 2, 3, 4, 5, 6]

        rb1.add(a=a[:-1], next_a=a[1:])

        fname="unsafe_incompatible_next_of.npz"
        rb1.save_transitions(fname, safe=False)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["next_a"], t2["next_a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
        np.testing.assert_allclose(t1["next_a"], t3["next_a"])
예제 #7
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_fulled_unsafe_next_of(self):
        """
        Load with already fulled buffer
        """
        buffer_size = 10
        env_dict = {"a": {}}

        rb1 = ReplayBuffer(buffer_size, env_dict, next_of="a")
        rb2 = ReplayBuffer(buffer_size, env_dict, next_of="a")
        rb3 = ReplayBuffer(buffer_size, env_dict, next_of="a")

        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

        rb1.add(a=a[:-1], next_a=a[1:])

        fname="fulled_unsafe_next_of.npz"
        rb1.save_transitions(fname, safe=False)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["next_a"], t2["next_a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
        np.testing.assert_allclose(t1["next_a"], t3["next_a"])
예제 #8
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_unsafe_next_of_already_filled(self):
        """
        Load unsafe next_of transitions with already filled buffer
        """
        buffer_size = 10
        env_dict = {"a": {}}

        rb1 = ReplayBuffer(buffer_size, env_dict, next_of="a")
        rb2 = ReplayBuffer(buffer_size, env_dict, next_of="a")
        rb3 = ReplayBuffer(buffer_size, env_dict, next_of="a")

        a = [1, 2, 3, 4, 5, 6]
        b = [7, 8, 9]

        rb1.add(a=a[:-1], next_a=a[1:])
        rb2.add(a=b[:-1], next_a=b[1:])
        rb3.add(a=b[:-1], next_a=b[1:])

        fname="unsafe_next_of_already.npz"
        rb1.save_transitions(fname, safe=False)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        self.assertEqual(rb1.get_stored_size()+len(b)-1, rb2.get_stored_size())
        self.assertEqual(rb1.get_stored_size()+len(b)-1, rb3.get_stored_size())

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"][len(b)-1:])
        np.testing.assert_allclose(t1["next_a"], t2["next_a"][len(b)-1:])
        np.testing.assert_allclose(t1["a"], t3["a"][len(b)-1:])
        np.testing.assert_allclose(t1["next_a"], t3["next_a"][len(b)-1:])
예제 #9
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_next_of(self):
        """
        Load next_of transitions with safe mode

        For safe mode, next_of is not neccessary at loaded buffer.
        """
        buffer_size = 10
        env_dict1 = {"a": {}}
        env_dict2 = {"a": {}, "next_a": {}}

        rb1 = ReplayBuffer(buffer_size, env_dict1, next_of="a")
        rb2 = ReplayBuffer(buffer_size, env_dict2)
        rb3 = ReplayBuffer(buffer_size, env_dict2)

        a = [1, 2, 3, 4, 5, 6]

        rb1.add(a=a[:-1], next_a=a[1:])

        fname="next_of.npz"
        rb1.save_transitions(fname)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["next_a"], t2["next_a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
        np.testing.assert_allclose(t1["next_a"], t3["next_a"])
예제 #10
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_Nstep_incompatibility(self):
        """
        Raise ValueError when Nstep incompatibility
        """
        buffer_size = 10
        env_dict = {"done": {}}
        Nstep = {"size": 3, "gamma": 0.99}

        rb1 = ReplayBuffer(buffer_size, env_dict, Nstep=Nstep)
        rb2 = ReplayBuffer(buffer_size, env_dict)
        rb3 = ReplayBuffer(buffer_size, env_dict)

        d = [0, 0, 0, 0, 1]

        rb1.add(done=d)
        rb1.on_episode_end()

        fname="Nstep_raise.npz"
        rb1.save_transitions(fname)

        with self.assertRaises(ValueError):
            rb2.load_transitions(fname)

        with self.assertRaises(ValueError):
            rb3.load_transitions(v(1,fname))
예제 #11
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_basic(self):
        """
        Basic Test Case

        Loaded buffer have same transitions with saved one.
        """
        buffer_size = 4
        env_dict = {"a": {}}

        rb1 = ReplayBuffer(buffer_size, env_dict)
        rb2 = ReplayBuffer(buffer_size, env_dict)
        rb3 = ReplayBuffer(buffer_size, env_dict)

        a = [1, 2, 3, 4]

        rb1.add(a=a)

        fname = "basic.npz"
        rb1.save_transitions(fname)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
예제 #12
0
파일: save_load.py 프로젝트: ymd-h/cpprb
    def test_load_Nstep(self):
        """
        Load Nstep transitions
        """
        buffer_size = 10
        env_dict = {"done": {}}
        Nstep = {"size": 3, "gamma": 0.99}

        rb1 = ReplayBuffer(buffer_size, env_dict, Nstep=Nstep)
        rb2 = ReplayBuffer(buffer_size, env_dict, Nstep=Nstep)
        rb3 = ReplayBuffer(buffer_size, env_dict, Nstep=Nstep)

        d = [0, 0, 0, 0, 1]

        rb1.add(done=d)
        rb1.on_episode_end()

        fname="Nstep.npz"
        rb1.save_transitions(fname)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["done"], t2["done"])
        np.testing.assert_allclose(t1["done"], t3["done"])
예제 #13
0
class LearnerBase(tf.Module):
    # PUBLIC
    def __init__(self,
                 model,
                 filename=None,
                 bufferSize=264,
                 numEpochs=100,
                 batchSize=30,
                 log=False,
                 logPath=None):
        self.model = model
        self.sDim = model.get_state_dim()
        self.aDim = model.get_action_dim()
        self.optimizer = tf.optimizers.Adam(learning_rate=0.5)
        self.rb = ReplayBuffer(bufferSize,
                               env_dict={
                                   "obs": {
                                       "shape": (self.sDim, 1)
                                   },
                                   "act": {
                                       "shape": (self.aDim, 1)
                                   },
                                   "next_obs": {
                                       "shape": (self.sDim, 1)
                                   }
                               })
        self.numEpochs = numEpochs
        self.batchSize = batchSize

        if filename is not None:
            self.load_rb(filename)

        self.log = log
        self.step = 0

        if self.log:
            stamp = datetime.now().strftime("%Y.%m.%d-%H:%M:%S")
            self.logdir = os.path.join(logPath, "learner", stamp)
            self.writer = tf.summary.create_file_writer(self.logdir)
            self._save_graph()

    def load_rb(self, filename):
        self.rb.load_transitions(filename)

    def add_rb(self, x, u, xNext):
        self.rb.add(obs=x, act=u, next_obs=xNext)

    def train(self, X, y, batchSize=-1, epoch=1, learninRate=0.1, kfold=None):
        for e in range(epoch):
            if batchSize == -1:
                batchLoss = self._train_step(X, y)
                if self.log:
                    with self.writer.as_default():
                        if kfold is not None:
                            scope = "epoch{}/batch{}/lr{}/loss".format(
                                epoch, batchSize, learninRate)
                        else:
                            scope = "Loss"
                        tf.summary.scalar(scope, batchLoss, self.step)
                        self.step += 1
                pass
            for i in range(0, X.shape[0], batchSize):
                batchLoss = self._train_step(X[i:i + batchSize],
                                             y[i:i + batchSize])
                if self.log:
                    with self.writer.as_default():
                        if kfold is not None:
                            scope = "epoch{}/batch{}/lr{}/loss_fold{}".format(
                                epoch, batchSize, learninRate, kfold)
                        else:
                            scope = "Loss"
                        tf.summary.scalar(scope, batchLoss, self.step)
                        self.step += 1

    def rb_trans(self):
        return self.rb.get_all_transitions().copy()

    def save_rb(self, filename):
        self.rb.save_transitions(filename)

    def save_params(self, step):
        self.model.save_params(self.logdir, step)

    def grid_search(self, trajs, actionSeqs):
        init_weights = self.model.get_weights()

        learningRate = np.linspace(0.0001, 0.1, 10)
        batchSize = np.array([-1])
        epoch = np.array([100, 500, 1000])

        mean = []
        for lr in learningRate:
            for bs in batchSize:
                for e in epoch:
                    fold = self.k_fold_validation(learningRate=lr,
                                                  batchSize=bs,
                                                  epoch=e,
                                                  k=10)
                    mean.append(np.mean(fold))
                    print("*" * 5, " Grid ", 5 * "*")
                    print("lr: ", lr)
                    print("bs: ", bs)
                    print("e: ", e)
                    print("fold: ", fold)
                    print("mean: ", np.mean(fold))

                    self.train_all(learningRate=lr, batchSize=bs, epoch=e)
                    err = self.validate(actionSeqs, trajs)
                    print("validation error: ", err.numpy())
                    self.model.update_weights(init_weights, msg=False)
        print("Best mean:", np.max(mean))

    def train_all(self, learningRate=0.1, batchSize=32, epoch=100):
        self.optimizer = tf.optimizers.Adam(learning_rate=learningRate)
        data = self.rb_trans()
        (X, y) = self.model.prepare_training_data(data['obs'],
                                                  data['next_obs'],
                                                  data['act'])
        self.step = 0
        self.train(X,
                   y,
                   batchSize=batchSize,
                   epoch=epoch,
                   learninRate=learningRate)

    def k_fold_validation(self,
                          k=10,
                          learningRate=0.1,
                          batchSize=32,
                          epoch=100):
        # First get all the data
        self.optimizer = tf.optimizers.Adam(learning_rate=learningRate)
        data = self.rb_trans()
        (X, y) = self.model.prepare_training_data(data['obs'],
                                                  data['next_obs'],
                                                  data['act'])
        kfold = KFold(n_splits=k, shuffle=True)

        init_weights = self.model.get_weights()
        fold = []
        X = X.numpy()
        y = y.numpy()
        i = 0
        for train, test in kfold.split(X, y):
            self.step = 0
            self.train(X[train],
                       y[train],
                       batchSize=batchSize,
                       epoch=epoch,
                       learninRate=learningRate,
                       kfold=i)
            self.model.update_weights(init_weights, msg=False)
            lossFold = self.evaluate(X[test], y[test])
            fold.append(lossFold.numpy())
            i += 1

        self.model.update_weights(init_weights, msg=False)
        return fold

    def evaluate(self, X, y):
        pred = self.model._predict_nn("Eval", np.squeeze(X, axis=-1))
        loss = tf.reduce_mean(tf.math.squared_difference(pred, y), name="loss")
        return loss

    def plot_seq(self, traj, gtTraj):
        fig, axs = plt.subplots(figsize=(20, 10), nrows=2, ncols=8)
        # Position
        axs[0, 0].plot(traj[:, 0])
        axs[0, 0].plot(gtTraj[:, 0])

        axs[0, 1].plot(traj[:, 1])
        axs[0, 1].plot(gtTraj[:, 1])

        axs[0, 2].plot(traj[:, 2])
        axs[0, 2].plot(gtTraj[:, 2])

        # Quaternion
        axs[0, 3].plot(traj[:, 3])
        axs[0, 3].plot(gtTraj[:, 3])

        axs[0, 4].plot(traj[:, 4])
        axs[0, 4].plot(gtTraj[:, 4])

        axs[0, 5].plot(traj[:, 5])
        axs[0, 5].plot(gtTraj[:, 5])

        axs[0, 6].plot(traj[:, 6])
        axs[0, 6].plot(gtTraj[:, 6])

        # Lin Vel
        axs[1, 0].plot(traj[:, 0])
        axs[1, 0].plot(gtTraj[:, 0])

        axs[1, 1].plot(traj[:, 1])
        axs[1, 1].plot(gtTraj[:, 1])

        axs[1, 2].plot(traj[:, 2])
        axs[1, 2].plot(gtTraj[:, 2])

        # Ang vel
        axs[1, 3].plot(traj[:, 3])
        axs[1, 3].plot(gtTraj[:, 3])

        axs[1, 4].plot(traj[:, 4])
        axs[1, 4].plot(gtTraj[:, 4])

        axs[1, 5].plot(traj[:, 5])
        axs[1, 5].plot(gtTraj[:, 5])
        plt.show()

    def validate(self, actionSeqs, gtTrajs):
        '''
            computes the error of the model for a number of trajectories with
            the matching action sequences.

            - input:
            --------
                - acitonSeqs: Tensor of the action sequences.
                    Shape [k, tau, 6, 1]
                
                - gtTrajs: Tensor of the ground truth trajectories.
                    Shape [k, tau, 13, 1]

            - output:
            ---------
                - L(nn(actionSeqs), trajs), the loss between the predicted trajectory
                and the ground truth trajectory.
        '''
        tau = actionSeqs.shape[1]
        k = actionSeqs.shape[0]
        state = np.expand_dims(gtTrajs[:, 0], axis=-1)
        trajs = [np.expand_dims(state, axis=1)]
        # PAY ATTENTION TO THE FOR LOOPS WITH @tf.function.
        for i in range(tau - 1):
            with tf.name_scope("Rollout_" + str(i)):
                with tf.name_scope("Prepare_data_" + str(i)) as pd:
                    # make the action a [1, 6, 1] tensor
                    action = np.expand_dims(actionSeqs[:, i], axis=-1)
                with tf.name_scope("Step_" + str(i)) as s:
                    nextState = self.model.build_step_graph(s, state, action)
            state = nextState
            trajs.append(np.expand_dims(state, axis=1))

        trajs = np.squeeze(np.concatenate(trajs, axis=1), axis=-1)
        err = tf.linalg.norm(tf.subtract(trajs, gtTrajs)) / k

        self.plot_seq(trajs[0], gtTrajs[0])
        return err

    # PRIVATE
    def _train_step(self, X, y):
        # If batchSize = -1, feed in the entire batch
        with tf.GradientTape() as tape:
            pred = self.model._predict_nn("train", np.squeeze(X, axis=-1))
            loss = tf.reduce_mean(tf.math.squared_difference(pred, y),
                                  name="loss")
            grads = tape.gradient(loss, self.model.weights())
            self.optimizer.apply_gradients(zip(grads, self.model.weights()))
            return loss

    def _save_graph(self):
        state = tf.zeros((1, self.model.get_state_dim(), 1), dtype=tf.float64)
        action = tf.zeros((1, self.model.get_action_dim(), 1),
                          dtype=tf.float64)
        with self.writer.as_default():
            graph = tf.function(
                self.model.build_step_graph).get_concrete_function(
                    "graph", state, action).graph
            # visualize
            summary_ops_v2.graph(graph.as_graph_def())