nb_epochs = 100 with tf.Session(config=tf_config) as sess: sess_start = time.time() datetime_str = datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S") data_log = open('logs/' + datetime_str + '_log.txt', 'w+') # generic data gathering X_data = [] y_data = [] x = tf.placeholder(dtype=tf.float32, shape=([None, 4])) real_y = tf.placeholder(dtype=tf.float32, shape=([None, 3])) y = fk_learner(x) * pos_bounds loss = losses.loss_p(real_y, y) train_step = tf.train.AdamOptimizer(1e-5).minimize(loss) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() if load is None: print('Loading Data...') data_file = 'real_widowx_train_10hz_100K_default_processed.pkl' train_data = pickle.load(open(data_file, 'rb+')) for episode in train_data: X_data.append(episode[0][:-3] * angle_bounds) y_data.append(episode[0][-3:]) x_batches = batch(X_data, 32) y_batches = batch(y_data, 32) print('Training...')
def __init__(self, env_in): EnvLearner.__init__(self, env_in) # Initialization self.buff_len = 10 self.seq_len = 1 self.max_seq_len = 5 self.last_r = np.array([0.0]).flatten() self.buffer = deque(self.buff_init * self.buff_len, maxlen=self.buff_len) dropout_rate = 0.5 self.lr_disc = 1e-5 self.lr_gen = 1e-5 print('General Stats: ') print('Drop Rate: ' + str(dropout_rate)) print('Buffer Len: ' + str(self.buff_len)) print('Start Sequence Len: ' + str(self.seq_len)) print('End Sequence Len: ' + str(self.max_seq_len)) print('gan_model:') print('Learning Rate: ' + str(self.lr_disc)) print('Learning Rate: ' + str(self.lr_gen)) """ State Prediction """ self.x_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.buff_init[0].size * self.buff_len])) self.y_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.state_dim * self.max_seq_len])) self.a_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.act_dim * self.max_seq_len])) a_seq_split = tf.split(self.a_seq, self.max_seq_len, 1) y_seq_split = tf.split(self.y_seq, self.max_seq_len, 1) input_tmp_seq = tf.split(self.x_seq, self.buff_len, 1) self.out_state_raw = models.generator_model(input_tmp_seq, self.state_dim, drop_rate=dropout_rate) self.out_state = self.out_state_raw*self.state_mul_const self.loss_seq = 0.0 self.loss_last = 0.0 out_states = [] out_states.append(self.out_state_raw) self.loss_seq += losses.loss_p(out_states[-1], y_seq_split[0]) self.loss_last += losses.loss_p(out_states[-1], tf.slice(input_tmp_seq[-1], [0, 0], [-1, self.state_dim])) for i in range(1, self.seq_len): state_tmp = tf.slice(self.x_seq[:], [0, self.buff_init[0].size], [-1, -1] ) state_tmp = tf.concat([state_tmp, out_states[-1]], axis=1) input_tmp = tf.concat([state_tmp, a_seq_split[i]], axis=1) input_tmp_seq = tf.split(input_tmp, self.buff_len, 1) out_state_raw_tmp = models.generator_model(input_tmp_seq, self.state_dim, drop_rate=dropout_rate) out_states.append(out_state_raw_tmp) self.loss_seq += losses.loss_p(out_states[-1], y_seq_split[i]) self.loss_last += losses.loss_p(out_states[-1], out_states[-2]) self.out_state_seq = tf.concat(out_states, axis=1) self.loss_state = self.loss_seq self.train_step_state = tf.train.AdamOptimizer(self.lr_gen).minimize(self.loss_state) """ GAN Stuff """ x_seq = [] g_seq = [] out_seq_split = tf.split(self.out_state_seq, self.seq_len, 1) for i in range(self.seq_len): x_seq.append(tf.concat([y_seq_split[i], a_seq_split[i]], axis=1)) g_seq.append(tf.concat([out_seq_split[i], a_seq_split[i]], axis=1)) x_in = x_seq g_in = g_seq self.Dx = models.discriminator_model(x_in, drop_rate=dropout_rate) self.Dg = models.discriminator_model(g_in, drop_rate=dropout_rate) var_d = tf.trainable_variables('discriminator') var_g = tf.trainable_variables('generator') self.g_lambda = 1.0 self.p_lambda = 0.0 self.t_lambda = 0.0 """ Vanilla GAN """ # self.n_d = 1 # self.disc_loss = -tf.reduce_mean(tf.log(self.Dx) + tf.log(1-self.Dg)) # self.g_loss = -tf.reduce_mean(tf.log(self.Dg)) # self.gen_loss = g_lambda*self.g_loss + p_lambda * self.loss_seq # self.train_step_disc = tf.train.AdamOptimizer(lr_disc).minimize(self.disc_loss, var_list=var_d) # self.train_step_gen = tf.train.AdamOptimizer(lr_gen).minimize(self.gen_loss, var_list=var_g) """ WGAN-GP """ self.n_d = 5 self.epsilon = 0.01 self.gp_lambda = 10 self.disc_loss = tf.reduce_mean(self.Dg) - tf.reduce_mean(self.Dx) self.g_loss = -tf.reduce_mean(self.Dg) self.gen_loss = self.g_lambda*self.g_loss + \ self.p_lambda * self.loss_seq + \ self.t_lambda * self.loss_last x_hat = self.epsilon*self.Dx + (1-self.epsilon)*self.Dg grad_list = tf.gradients(x_hat, var_d)[2:] GP = 0.0 for layer in grad_list: GP += self.gp_lambda * (tf.sqrt(tf.reduce_sum(tf.square(layer))) - 1) ** 2 self.disc_loss += GP self.train_step_disc = tf.train.AdamOptimizer(self.lr_disc, beta1=0, beta2=0.9).minimize(self.disc_loss, var_list=var_d) self.train_step_gen = tf.train.AdamOptimizer(self.lr_gen, beta1=0, beta2=0.9).minimize(self.gen_loss, var_list=var_g)
def init_gan_losses(self): a_seq_split = tf.split(self.a_seq, self.max_seq_len, 1) y_seq_split = tf.split(self.y_seq, self.max_seq_len, 1) input_tmp_seq = tf.split(self.x_seq, self.buff_len, 1) self.loss_seq = 0.0 self.loss_last = 0.0 out_states = [] out_states.append(self.out_state_raw) self.loss_seq += losses.loss_p(out_states[-1], y_seq_split[0]) self.loss_last += losses.loss_p(out_states[-1], tf.slice(input_tmp_seq[-1], [0, 0], [-1, self.state_dim])) for i in range(1, self.seq_len): state_tmp = tf.slice(self.x_seq[:], [0, self.buff_init[0].size], [-1, -1] ) state_tmp = tf.concat([state_tmp, out_states[-1]], axis=1) input_tmp = tf.concat([state_tmp, a_seq_split[i]], axis=1) input_tmp_seq = tf.split(input_tmp, self.buff_len, 1) out_state_raw_tmp = models.generator_model(input_tmp_seq, self.state_dim, drop_rate=0.5) out_states.append(out_state_raw_tmp) self.loss_seq += losses.loss_p(out_states[-1], y_seq_split[i]) self.loss_last += losses.loss_p(out_states[-1], out_states[-2]) self.out_state_seq = tf.concat(out_states, axis=1) self.loss_state = self.loss_seq """ GAN Stuff """ x_seq = [] g_seq = [] out_seq_split = tf.split(self.out_state_seq, self.seq_len, 1) for i in range(self.seq_len): x_seq.append(tf.concat([y_seq_split[i], a_seq_split[i]], axis=1)) g_seq.append(tf.concat([out_seq_split[i], a_seq_split[i]], axis=1)) self.x_in = x_seq self.g_in = g_seq var_d = tf.trainable_variables('discriminator') var_g = tf.trainable_variables('generator') """ Vanilla GAN """ # self.n_d = 1 # self.disc_loss = -tf.reduce_mean(tf.log(self.Dx) + tf.log(1-self.Dg)) # self.g_loss = -tf.reduce_mean(tf.log(self.Dg)) # self.gen_loss = g_lambda*self.g_loss + p_lambda * self.loss_seq # self.train_step_disc = tf.train.AdamOptimizer(lr_disc).minimize(self.disc_loss, var_list=var_d) # self.train_step_gen = tf.train.AdamOptimizer(lr_gen).minimize(self.gen_loss, var_list=var_g) """ WGAN-GP """ self.disc_loss = tf.reduce_mean(self.Dg) - tf.reduce_mean(self.Dx) self.g_loss = -tf.reduce_mean(self.Dg) self.gen_loss = self.g_lambda * self.g_loss + \ self.p_lambda * self.loss_seq + \ self.t_lambda * self.loss_last x_hat = self.epsilon * self.Dx + (1 - self.epsilon) * self.Dg grad_list = tf.gradients(x_hat, var_d)[2:] GP = 0.0 for layer in grad_list: GP += self.gp_lambda * (tf.sqrt(tf.reduce_sum(tf.square(layer))) - 1) ** 2 self.disc_loss += GP
def __init__(self, env_in): EnvLearner.__init__(self, env_in) # Initialization self.buff_len = 10 self.seq_len = 5 self.max_seq_len = 5 self.last_r = np.array([0.0]).flatten() self.buffer = deque(self.buff_init * self.buff_len, maxlen=self.buff_len) dropout_rate = 0.5 self.lr_disc = 1e-5 self.lr_gen = 1e-5 print('General Stats: ') print('Drop Rate: ' + str(dropout_rate)) print('Buffer Len: ' + str(self.buff_len)) print('Start Sequence Len: ' + str(self.seq_len)) print('End Sequence Len: ' + str(self.max_seq_len)) print('dnn_model:') print('Learning Rate: ' + str(self.lr_disc)) print('Learning Rate: ' + str(self.lr_gen)) discount = 1 """ State Prediction """ self.x_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.buff_init[0].size * self.buff_len])) self.y_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.state_dim * self.max_seq_len])) self.a_seq = tf.placeholder(dtype=tf.float32, shape=([None, self.act_dim * self.max_seq_len])) a_seq_split = tf.split(self.a_seq, self.max_seq_len, 1) y_seq_split = tf.split(self.y_seq, self.max_seq_len, 1) input_tmp_seq = tf.split(self.x_seq, self.buff_len, 1) self.out_state_raw = models.generator_model(input_tmp_seq, self.state_dim, drop_rate=dropout_rate) self.out_state = self.out_state_raw*self.state_mul_const self.loss_seq = 0.0 self.loss_last = 0.0 out_states = [] out_states.append(self.out_state_raw) self.loss_seq += losses.loss_p(out_states[-1], y_seq_split[0]) self.loss_last += losses.loss_p(out_states[-1], tf.slice(input_tmp_seq[-1], [0, 0], [-1, self.state_dim])) for i in range(1, self.seq_len): state_tmp = tf.slice(self.x_seq[:], [0, self.buff_init[0].size], [-1, -1] ) state_tmp = tf.concat([state_tmp, out_states[-1]], axis=1) input_tmp = tf.concat([state_tmp, a_seq_split[i]], axis=1) input_tmp_seq = tf.split(input_tmp, self.buff_len, 1) out_state_raw_tmp = models.generator_model(input_tmp_seq, self.state_dim, drop_rate=dropout_rate) out_states.append(out_state_raw_tmp) self.loss_seq += (discount**(i-1))*losses.loss_p(out_states[-1], y_seq_split[i]) self.loss_last += losses.loss_p(out_states[-1], out_states[-2]) self.out_state_seq = tf.concat(out_states, axis=1) self.loss_state = self.loss_seq self.train_step_state = tf.train.AdamOptimizer(self.lr_gen).minimize(self.loss_state) self.loss = self.loss_seq self.train_step = tf.train.AdamOptimizer(self.lr_gen, beta1=0, beta2=0.9).minimize(self.loss)