nb_epochs = 100

    with tf.Session(config=tf_config) as sess:
        sess_start = time.time()
        datetime_str = datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S")
        data_log = open('logs/' + datetime_str + '_log.txt', 'w+')

        # generic data gathering

        X_data = []
        y_data = []

        x = tf.placeholder(dtype=tf.float32, shape=([None, 4]))
        real_y = tf.placeholder(dtype=tf.float32, shape=([None, 3]))
        y = fk_learner(x) * pos_bounds
        loss = losses.loss_p(real_y, y)
        train_step = tf.train.AdamOptimizer(1e-5).minimize(loss)
        sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver()

        if load is None:
            print('Loading Data...')
            data_file = 'real_widowx_train_10hz_100K_default_processed.pkl'
            train_data = pickle.load(open(data_file, 'rb+'))
            for episode in train_data:
                X_data.append(episode[0][:-3] * angle_bounds)
                y_data.append(episode[0][-3:])
            x_batches = batch(X_data, 32)
            y_batches = batch(y_data, 32)
            print('Training...')
Ejemplo n.º 2
0
    def __init__(self, env_in):
        EnvLearner.__init__(self, env_in)
        # Initialization
        self.buff_len = 10
        self.seq_len = 1
        self.max_seq_len = 5
        self.last_r = np.array([0.0]).flatten()
        self.buffer = deque(self.buff_init * self.buff_len, maxlen=self.buff_len)
        dropout_rate = 0.5
        self.lr_disc = 1e-5
        self.lr_gen = 1e-5
        print('General Stats: ')
        print('Drop Rate: ' + str(dropout_rate))
        print('Buffer Len: ' + str(self.buff_len))
        print('Start Sequence Len: ' + str(self.seq_len))
        print('End Sequence Len: ' + str(self.max_seq_len))
        print('gan_model:')
        print('Learning Rate: ' + str(self.lr_disc))
        print('Learning Rate: ' + str(self.lr_gen))

        """ State Prediction """
        self.x_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.buff_init[0].size * self.buff_len]))
        self.y_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.state_dim * self.max_seq_len]))
        self.a_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.act_dim * self.max_seq_len]))

        a_seq_split = tf.split(self.a_seq, self.max_seq_len, 1)
        y_seq_split = tf.split(self.y_seq, self.max_seq_len, 1)

        input_tmp_seq = tf.split(self.x_seq, self.buff_len, 1)
        self.out_state_raw = models.generator_model(input_tmp_seq, self.state_dim, drop_rate=dropout_rate)

        self.out_state = self.out_state_raw*self.state_mul_const
        self.loss_seq = 0.0
        self.loss_last = 0.0
        out_states = []
        out_states.append(self.out_state_raw)
        self.loss_seq += losses.loss_p(out_states[-1], y_seq_split[0])
        self.loss_last += losses.loss_p(out_states[-1], tf.slice(input_tmp_seq[-1], [0, 0], [-1, self.state_dim]))
        for i in range(1, self.seq_len):
            state_tmp = tf.slice(self.x_seq[:],
                                   [0, self.buff_init[0].size],
                                   [-1, -1]
                                   )
            state_tmp = tf.concat([state_tmp, out_states[-1]], axis=1)
            input_tmp = tf.concat([state_tmp, a_seq_split[i]], axis=1)

            input_tmp_seq = tf.split(input_tmp, self.buff_len, 1)
            out_state_raw_tmp = models.generator_model(input_tmp_seq, self.state_dim, drop_rate=dropout_rate)
            out_states.append(out_state_raw_tmp)
            self.loss_seq += losses.loss_p(out_states[-1], y_seq_split[i])
            self.loss_last += losses.loss_p(out_states[-1], out_states[-2])

        self.out_state_seq = tf.concat(out_states, axis=1)

        self.loss_state = self.loss_seq

        self.train_step_state = tf.train.AdamOptimizer(self.lr_gen).minimize(self.loss_state)

        """ GAN Stuff """
        x_seq = []
        g_seq = []
        out_seq_split = tf.split(self.out_state_seq, self.seq_len, 1)
        for i in range(self.seq_len):
            x_seq.append(tf.concat([y_seq_split[i], a_seq_split[i]], axis=1))
            g_seq.append(tf.concat([out_seq_split[i], a_seq_split[i]], axis=1))

        x_in = x_seq
        g_in = g_seq
        self.Dx = models.discriminator_model(x_in, drop_rate=dropout_rate)
        self.Dg = models.discriminator_model(g_in, drop_rate=dropout_rate)
        var_d = tf.trainable_variables('discriminator')
        var_g = tf.trainable_variables('generator')
        self.g_lambda = 1.0
        self.p_lambda = 0.0
        self.t_lambda = 0.0

        """ Vanilla GAN """
        # self.n_d = 1
        # self.disc_loss = -tf.reduce_mean(tf.log(self.Dx) + tf.log(1-self.Dg))
        # self.g_loss = -tf.reduce_mean(tf.log(self.Dg))
        # self.gen_loss =  g_lambda*self.g_loss + p_lambda * self.loss_seq
        # self.train_step_disc = tf.train.AdamOptimizer(lr_disc).minimize(self.disc_loss, var_list=var_d)
        # self.train_step_gen = tf.train.AdamOptimizer(lr_gen).minimize(self.gen_loss, var_list=var_g)

        """ WGAN-GP """
        self.n_d = 5
        self.epsilon = 0.01
        self.gp_lambda = 10

        self.disc_loss = tf.reduce_mean(self.Dg) - tf.reduce_mean(self.Dx)
        self.g_loss = -tf.reduce_mean(self.Dg)
        self.gen_loss =  self.g_lambda*self.g_loss + \
                         self.p_lambda * self.loss_seq + \
                         self.t_lambda * self.loss_last
        x_hat = self.epsilon*self.Dx + (1-self.epsilon)*self.Dg
        grad_list = tf.gradients(x_hat, var_d)[2:]
        GP = 0.0
        for layer in grad_list:
            GP += self.gp_lambda * (tf.sqrt(tf.reduce_sum(tf.square(layer))) - 1) ** 2
        self.disc_loss += GP
        self.train_step_disc = tf.train.AdamOptimizer(self.lr_disc, beta1=0, beta2=0.9).minimize(self.disc_loss, var_list=var_d)
        self.train_step_gen = tf.train.AdamOptimizer(self.lr_gen, beta1=0, beta2=0.9).minimize(self.gen_loss, var_list=var_g)
Ejemplo n.º 3
0
    def init_gan_losses(self):
        a_seq_split = tf.split(self.a_seq, self.max_seq_len, 1)
        y_seq_split = tf.split(self.y_seq, self.max_seq_len, 1)
        input_tmp_seq = tf.split(self.x_seq, self.buff_len, 1)
        self.loss_seq = 0.0
        self.loss_last = 0.0
        out_states = []
        out_states.append(self.out_state_raw)
        self.loss_seq += losses.loss_p(out_states[-1], y_seq_split[0])
        self.loss_last += losses.loss_p(out_states[-1], tf.slice(input_tmp_seq[-1], [0, 0], [-1, self.state_dim]))
        for i in range(1, self.seq_len):
            state_tmp = tf.slice(self.x_seq[:],
                                 [0, self.buff_init[0].size],
                                 [-1, -1]
                                 )
            state_tmp = tf.concat([state_tmp, out_states[-1]], axis=1)
            input_tmp = tf.concat([state_tmp, a_seq_split[i]], axis=1)

            input_tmp_seq = tf.split(input_tmp, self.buff_len, 1)
            out_state_raw_tmp = models.generator_model(input_tmp_seq, self.state_dim, drop_rate=0.5)
            out_states.append(out_state_raw_tmp)
            self.loss_seq += losses.loss_p(out_states[-1], y_seq_split[i])
            self.loss_last += losses.loss_p(out_states[-1], out_states[-2])

        self.out_state_seq = tf.concat(out_states, axis=1)

        self.loss_state = self.loss_seq

        """ GAN Stuff """
        x_seq = []
        g_seq = []
        out_seq_split = tf.split(self.out_state_seq, self.seq_len, 1)
        for i in range(self.seq_len):
            x_seq.append(tf.concat([y_seq_split[i], a_seq_split[i]], axis=1))
            g_seq.append(tf.concat([out_seq_split[i], a_seq_split[i]], axis=1))

        self.x_in = x_seq
        self.g_in = g_seq
        var_d = tf.trainable_variables('discriminator')
        var_g = tf.trainable_variables('generator')

        """ Vanilla GAN """
        # self.n_d = 1
        # self.disc_loss = -tf.reduce_mean(tf.log(self.Dx) + tf.log(1-self.Dg))
        # self.g_loss = -tf.reduce_mean(tf.log(self.Dg))
        # self.gen_loss =  g_lambda*self.g_loss + p_lambda * self.loss_seq
        # self.train_step_disc = tf.train.AdamOptimizer(lr_disc).minimize(self.disc_loss, var_list=var_d)
        # self.train_step_gen = tf.train.AdamOptimizer(lr_gen).minimize(self.gen_loss, var_list=var_g)

        """ WGAN-GP """
        self.disc_loss = tf.reduce_mean(self.Dg) - tf.reduce_mean(self.Dx)
        self.g_loss = -tf.reduce_mean(self.Dg)
        self.gen_loss = self.g_lambda * self.g_loss + \
                        self.p_lambda * self.loss_seq + \
                        self.t_lambda * self.loss_last
        x_hat = self.epsilon * self.Dx + (1 - self.epsilon) * self.Dg
        grad_list = tf.gradients(x_hat, var_d)[2:]
        GP = 0.0
        for layer in grad_list:
            GP += self.gp_lambda * (tf.sqrt(tf.reduce_sum(tf.square(layer))) - 1) ** 2
        self.disc_loss += GP
Ejemplo n.º 4
0
    def __init__(self, env_in):
        EnvLearner.__init__(self, env_in)
        # Initialization
        self.buff_len = 10
        self.seq_len = 5
        self.max_seq_len = 5
        self.last_r = np.array([0.0]).flatten()
        self.buffer = deque(self.buff_init * self.buff_len, maxlen=self.buff_len)
        dropout_rate = 0.5
        self.lr_disc = 1e-5
        self.lr_gen = 1e-5
        print('General Stats: ')
        print('Drop Rate: ' + str(dropout_rate))
        print('Buffer Len: ' + str(self.buff_len))
        print('Start Sequence Len: ' + str(self.seq_len))
        print('End Sequence Len: ' + str(self.max_seq_len))
        print('dnn_model:')
        print('Learning Rate: ' + str(self.lr_disc))
        print('Learning Rate: ' + str(self.lr_gen))

        discount = 1

        """ State Prediction """
        self.x_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.buff_init[0].size * self.buff_len]))
        self.y_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.state_dim * self.max_seq_len]))
        self.a_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.act_dim * self.max_seq_len]))

        a_seq_split = tf.split(self.a_seq, self.max_seq_len, 1)
        y_seq_split = tf.split(self.y_seq, self.max_seq_len, 1)

        input_tmp_seq = tf.split(self.x_seq, self.buff_len, 1)
        self.out_state_raw = models.generator_model(input_tmp_seq, self.state_dim, drop_rate=dropout_rate)

        self.out_state = self.out_state_raw*self.state_mul_const
        self.loss_seq = 0.0
        self.loss_last = 0.0
        out_states = []
        out_states.append(self.out_state_raw)
        self.loss_seq += losses.loss_p(out_states[-1], y_seq_split[0])
        self.loss_last += losses.loss_p(out_states[-1], tf.slice(input_tmp_seq[-1], [0, 0], [-1, self.state_dim]))
        for i in range(1, self.seq_len):
            state_tmp = tf.slice(self.x_seq[:],
                                   [0, self.buff_init[0].size],
                                   [-1, -1]
                                   )
            state_tmp = tf.concat([state_tmp, out_states[-1]], axis=1)
            input_tmp = tf.concat([state_tmp, a_seq_split[i]], axis=1)

            input_tmp_seq = tf.split(input_tmp, self.buff_len, 1)
            out_state_raw_tmp = models.generator_model(input_tmp_seq, self.state_dim, drop_rate=dropout_rate)
            out_states.append(out_state_raw_tmp)
            self.loss_seq += (discount**(i-1))*losses.loss_p(out_states[-1], y_seq_split[i])
            self.loss_last += losses.loss_p(out_states[-1], out_states[-2])

        self.out_state_seq = tf.concat(out_states, axis=1)

        self.loss_state = self.loss_seq

        self.train_step_state = tf.train.AdamOptimizer(self.lr_gen).minimize(self.loss_state)

        self.loss =  self.loss_seq
        self.train_step = tf.train.AdamOptimizer(self.lr_gen, beta1=0, beta2=0.9).minimize(self.loss)