def policy_objective(self, batch): states, advantages, old_log_prob, true_speed, true_similarity = batch policy = self.network.policy(states, training=True) log_prob = policy['old_log_prob'] entropy = tf.reduce_mean(policy['entropy']) speed = policy['speed'] similarity = policy['similarity'] # Entropy entropy_penalty = self.entropy_strength() * entropy # Compute the probability ratio between the current and old policy ratio = tf.math.exp(log_prob - old_log_prob) ratio = tf.reduce_mean(ratio, axis=1) # mean over per-action ratio # Compute the clipped ratio times advantage clip_value = self.clip_ratio() min_adv = tf.where(advantages > 0.0, x=(1.0 + clip_value) * advantages, y=(1.0 - clip_value) * advantages) # aux losses speed_loss = 0.5 * tf.reduce_mean(losses.MSE(y_true=true_speed, y_pred=speed)) similarity_loss = 0.5 * tf.reduce_mean(losses.MSE(y_true=true_similarity, y_pred=similarity)) # total loss policy_loss = -tf.reduce_mean(tf.minimum(ratio * advantages, min_adv)) total_loss = policy_loss - entropy_penalty + speed_loss + similarity_loss # Log stuff self.log(ratio=tf.reduce_mean(ratio), log_prob=tf.reduce_mean(log_prob), entropy=entropy, entropy_coeff=self.entropy_strength.value, ratio_clip=clip_value, loss_speed_policy=speed_loss, loss_policy=policy_loss, loss_entropy=entropy_penalty, speed_pi=tf.reduce_mean(speed), loss_similarity_policy=similarity_loss, similarity_pi=tf.reduce_mean(similarity)) return total_loss
def optimize(self): # 优化网络主函数 # 从缓存中取出样本数据,转换成Tensor state = tf.constant([t.state for t in self.buffer], dtype=tf.float32) action = tf.constant([t.action for t in self.buffer], dtype=tf.int32) action = tf.reshape(action, [-1, 1]) reward = [t.reward for t in self.buffer] old_action_log_prob = tf.constant([t.a_log_prob for t in self.buffer], dtype=tf.float32) old_action_log_prob = tf.reshape(old_action_log_prob, [-1, 1]) # 通过MC方法循环计算R(st) R = 0 Rs = [] for r in reward[::-1]: R = r + gamma * R Rs.insert(0, R) Rs = tf.constant(Rs, dtype=tf.float32) # 对缓冲池数据大致迭代10遍 for _ in range(round(10 * len(self.buffer) / batch_size)): # 随机从缓冲池采样batch size大小样本 index = np.random.choice(np.arange(len(self.buffer)), batch_size, replace=False) # 构建梯度跟踪环境 with tf.GradientTape() as tape1, tf.GradientTape() as tape2: # 取出R(st),[b,1] v_target = tf.expand_dims(tf.gather(Rs, index, axis=0), axis=1) # 计算v(s)预测值,也就是偏置b,我们后面会介绍为什么写成v v = self.critic(tf.gather(state, index, axis=0)) delta = v_target - v # 计算优势值 advantage = tf.stop_gradient(delta) # 断开梯度连接 # 由于TF的gather_nd与pytorch的gather功能不一样,需要构造 # gather_nd需要的坐标参数,indices:[b, 2] # pi_a = pi.gather(1, a) # pytorch只需要一行即可实现 a = tf.gather(action, index, axis=0) # 取出batch的动作at # batch的动作分布pi(a|st) pi = self.actor(tf.gather(state, index, axis=0)) indices = tf.expand_dims(tf.range(a.shape[0]), axis=1) indices = tf.concat([indices, a], axis=1) pi_a = tf.gather_nd(pi, indices) # 动作的概率值pi(at|st), [b] pi_a = tf.expand_dims(pi_a, axis=1) # [b]=> [b,1] # 重要性采样 ratio = (pi_a / tf.gather(old_action_log_prob, index, axis=0)) surr1 = ratio * advantage surr2 = tf.clip_by_value(ratio, 1 - epsilon, 1 + epsilon) * advantage # PPO误差函数 policy_loss = -tf.reduce_mean(tf.minimum(surr1, surr2)) # 对于偏置v来说,希望与MC估计的R(st)越接近越好 value_loss = losses.MSE(v_target, v) # 优化策略网络 grads = tape1.gradient(policy_loss, self.actor.trainable_variables) self.actor_optimizer.apply_gradients( zip(grads, self.actor.trainable_variables)) # 优化偏置值网络 grads = tape2.gradient(value_loss, self.critic.trainable_variables) self.critic_optimizer.apply_gradients( zip(grads, self.critic.trainable_variables)) self.buffer = [] # 清空已训练数据
def train_net(model, optimizer, gamma, epsilon, lmd, k_epoch): s, a, r, s_next, a_prob, done_flag = model.package_trans() for epo_i in range(k_epoch): td_target = r + gamma * model.get_critic(s_next) * done_flag td_error = td_target - model.get_critic(s) td_error = td_error.numpy() advantage_ls = [] advantage = 0. for error in td_error[::-1]: advantage = gamma * lmd * advantage + error[0] advantage_ls.append(advantage) advantage_ls.reverse() with tf.GradientTape() as tape: advantage = tf.constant(advantage_ls, dtype=tf.float32) policy = model.get_policy(s, 1) index = tf.expand_dims(tf.range(a.shape[0]), 1) # print(index.shape, a.shape) a_index = tf.concat([index, a], axis=1) policy = tf.gather_nd(policy, a_index) policy = tf.expand_dims(policy, 1) ratio = tf.exp(tf.math.log(policy) - tf.math.log(a_prob)) surr1 = ratio * advantage_ls surr2 = tf.clip_by_value(ratio, 1 - epsilon, 1 + epsilon) * advantage loss = -tf.minimum(surr1, surr2) + losses.MSE( model.get_critic(s), td_target) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables))
def value_objective(self, batch): states, returns, true_speed, true_similarity = batch prediction = self.value_predict(states) values, speed, similarity = prediction['value'], prediction['speed'], prediction['similarity'] # compute normalized `value loss`: base_loss = tf.reduce_mean(losses.MSE(y_true=returns[:, 0], y_pred=values[:, 0])) exp_loss = tf.reduce_mean(losses.MSE(y_true=returns[:, 1], y_pred=values[:, 1])) value_loss = (0.25 * base_loss) + (exp_loss / (self.network.exp_scale ** 2)) # auxiliary losses: speed_loss = tf.reduce_mean(losses.MSE(y_true=true_speed, y_pred=speed)) similarity_loss = tf.reduce_mean(losses.MSE(y_true=true_similarity, y_pred=similarity)) self.log(speed_v=tf.reduce_mean(speed), similarity_v=tf.reduce_mean(similarity), loss_v=value_loss, loss_speed_value=speed_loss, loss_similarity_value=similarity_loss) return (value_loss + speed_loss + similarity_loss) * 0.25
def _update_qvalue(self, batch): obs1, acts, rews, obs2 = batch targets = (rews + self.gamma * self.qvalue_targ(obs2, self.policy_targ(obs2))) self.qvalue_opt.minimize( lambda: kls.MSE(targets, self.qvalue(obs1, acts)), self.qvalue.variables)
def trainOp(model, optimizer, dLoader, it, train=True): data = dLoader.__getitem__(it) with tf.GradientTape() as tape: y_true, y_pred = model(data, training=train) loss = losses.MSE(y_true, y_pred) if train: gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) return (loss)
def _update_qvalues(self, batch): obs1, acts, rews, obs2, done = batch done = tf.cast(done, tf.float32) targ_acts = self.policy_targ(obs2) noise = tf.random.normal(shape=targ_acts.shape, mean=0.0, stddev=self.targ_act_noise) noise = tf.clip_by_value( noise, -self.targ_act_clip, self.targ_act_clip, ) targets = rews + (1. - done) * self.gamma * tf.minimum( self.qvalue1_targ(obs2, targ_acts), self.qvalue2_targ(obs2, targ_acts)) self.qvalue_opt.minimize( lambda: kls.MSE(targets, self.qvalue1(obs1, acts)) + kls.MSE( targets, self.qvalue2(obs1, acts)), self.qvalue1.variables + self.qvalue2.variables)
def total_loss(boxes_gt, masks_gt, input_p, box_pred, mask_pred, rel_scores): y1 = K.flatten(boxes_gt) y2 = K.flatten(masks_gt) y1_pred = K.flatten(box_pred) y2_pred = K.flatten(mask_pred) input_p = K.expand_dims(input_p, axis=0) box_loss = losses.MSE(y1, y1_pred) mask_loss = losses.BinaryCrossentropy(from_logits=True)(y2, y2_pred) cos_sim = losses.CosineSimilarity()(boxes_gt, box_pred) loss_predicate = losses.categorical_crossentropy(input_p, rel_scores) return K.mean(box_loss * 1000 + mask_loss + cos_sim + loss_predicate)
def optimize(self): state = tf.constant([t.state for t in self.buffer], dtype=tf.float32) action = tf.constant([t.action for t in self.buffer], dtype=tf.int32) action = tf.reshape(action,[-1,1]) reward = [t.reward for t in self.buffer] old_action_log_prob = tf.constant([t.a_log_prob for t in self.buffer], dtype=tf.float32) old_action_log_prob = tf.reshape(old_action_log_prob, [-1,1]) R = 0 Rs = [] for r in reward[::-1]: R = r + gamma * R Rs.insert(0, R) Rs = tf.constant(Rs, dtype=tf.float32) for _ in range(round(10*len(self.buffer)/batch_size)): index = np.random.choice(np.arange(len(self.buffer)), batch_size, replace=False) with tf.GradientTape() as tape1, tf.GradientTape() as tape2: v_target = tf.expand_dims(tf.gather(Rs, index, axis=0), axis=1) v = self.critic(tf.gather(state, index, axis=0)) delta = v_target - v advantage = tf.stop_gradient(delta) a = tf.gather(action, index, axis=0) pi = self.actor(tf.gather(state, index, axis=0)) indices = tf.expand_dims(tf.range(a.shape[0]), axis=1) indices = tf.concat([indices, a], axis=1) pi_a = tf.gather_nd(pi, indices) pi_a = tf.expand_dims(pi_a, axis=1) # Importance Sampling ratio = (pi_a / tf.gather(old_action_log_prob, index, axis=0)) surr1 = ratio * advantage surr2 = tf.clip_by_value(ratio, 1 - epsilon, 1 + epsilon) * advantage policy_loss = -tf.reduce_mean(tf.minimum(surr1, surr2)) value_loss = losses.MSE(v_target, v) grads = tape1.gradient(policy_loss, self.actor.trainable_variables) self.actor_optimizer.apply_gradients(zip(grads, self.actor.trainable_variables)) grads = tape2.gradient(value_loss, self.critic.trainable_variables) self.critic_optimizer.apply_gradients(zip(grads, self.critic.trainable_variables)) self.buffer = []
def total_loss(boxes_gt, masks_gt, input_p, box_pred, mask_pred, rel_scores, loss): y1 = K.flatten(boxes_gt) y2 = K.flatten(masks_gt) y1_pred = K.flatten(box_pred) y2_pred = K.flatten(mask_pred) input_p = K.expand_dims(input_p, axis=0) if loss == 'MSE': box_loss = losses.MSE(y1, y1_pred) else: box_loss = losses.MAE(y1, y1_pred) mask_loss = losses.BinaryCrossentropy(from_logits=True)(y2, y2_pred) cos_sim = losses.CosineSimilarity()(boxes_gt, box_pred) loss_predicate = losses.categorical_crossentropy( input_p, K.reshape(rel_scores, input_p.shape)) return K.mean(box_loss * 10 + 0.01 * mask_loss + 0.001 * loss_predicate)
def train(model, train_db, optimizer, normed_test_data, test_labels): train_mae_losses = [] test_mae_losses = [] for epoch in range(200): for step, (x, y) in enumerate(train_db): with tf.GradientTape() as tape: out = model(x) loss = tf.reduce_mean(losses.MSE(y, out)) mae_loss = tf.reduce_mean(losses.MAE(y, out)) if step % 10 == 0: print(epoch, step, float(loss)) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) train_mae_losses.append(float(mae_loss)) out = model(tf.constant(normed_test_data.values)) test_mae_losses.append(tf.reduce_mean(losses.MAE(test_labels, out))) return train_mae_losses, test_mae_losses
train_db = train_db.shuffle(100).batch(512) # # 未训练时测试 # example_batch = normed_train_data[:10] # example_result = model.predict(example_batch) # example_result train_mae_losses = [] test_mae_losses = [] for epoch in range(200): for step, (x, y) in enumerate(train_db): with tf.GradientTape() as tape: out = model(x) # mse_lose 均方差 Mean Square Error loss = tf.reduce_mean(losses.MSE(y, out)) # mae_lose 平均绝对误差,Mean Absolute Error mae_loss = tf.reduce_mean(losses.MAE(y, out)) if step % 10 == 0: print(epoch, step, float(loss)) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) train_mae_losses.append(float(mae_loss)) # 把test的x带入计算out出来 out = model(tf.constant(normed_test_data.values)) # 在用这个out,统计MAE test_mae_losses.append(tf.reduce_mean(losses.MAE(test_labels, out)))
# This helps to stabilize the training of the model advantage = normalize(advantage) # Calculate the Policy and Value gradients for gradient descent with tf.GradientTape() as policy_tape, tf.GradientTape() as value_tape: logits = tf.nn.log_softmax(policy_net(np.atleast_2d(np.array(states)).astype('float32'))) """ Since we selected only one action out of the available ones, we need to identify that action using one_hot encoding """ one_hot_values = tf.squeeze(tf.one_hot(np.array(actions), env.action_space.n)) log_probs = tf.math.reduce_sum(logits * one_hot_values, axis=1) policy_loss = -tf.math.reduce_mean(advantage * log_probs) value_loss = kls.MSE(returns,tf.squeeze(value_net(np.atleast_2d(np.array(states)).astype('float32')))) policy_variables = policy_net.trainable_variables value_variables = value_net.trainable_variables policy_gradients = policy_tape.gradient(policy_loss, policy_variables) value_gradients = value_tape.gradient(value_loss, value_variables) # Update the policy network weights using ADAM optimizer_policy_net.apply_gradients(zip(policy_gradients, policy_variables)) """ Since we know the actual rewards that we got, value loss is pretty high. So we need to perform multiple iterations of gradient descent to achieve a good performance """ for iteration in range(train_value_iterations): optimizer_value_net.apply_gradients(zip(value_gradients, value_variables))
def train(self, batch_size=64, epochs=4): criterion = losses.mean_squared_error optimizer = optim.Adam(lr=0.001) loops = self.index // batch_size df = pd.read_csv(f'{self.directory}/steering.csv') for e in range(epochs): for i in range(loops): B = np.random.randint(0, self.index, size=batch_size) X = np.zeros((batch_size, 160, 320, 3)) S = np.zeros((batch_size, 1)) for b in range(batch_size): X[b] = Warp( mpimg.imread(f'{self.directory}/img/{B[b]}.jpg'), src, target) / 256 X = np.array(X, dtype=np.float32) S[b] = df.iloc[B[b]].steering if np.random.choice([True, False]): X[b] = np.flip(X[b], 1) S[b] = -S[b] else: pass if self.net.decode: with tf.GradientTape() as t: output, dec = self.net(X) loss1 = losses.MSE(S, output) grads = t.gradient( loss1, self.net.encoder.trainable_variables + self.net.predict_conv.trainable_variables + self.net.predict.trainable_variables) optimizer.apply_gradients( zip( grads, self.net.encoder.trainable_variables + self.net.predict_conv.trainable_variables + self.net.predict.trainable_variables)) with tf.GradientTape() as t: output, dec = self.net(X) loss2 = losses.MAE(X, dec) grads = t.gradient( loss2, self.net.encoder.trainable_variables + self.net.decoder.trainable_variables) optimizer.apply_gradients( zip( grads, self.net.encoder.trainable_variables + self.net.decoder.trainable_variables)) """ if e+i == 0: self.net.compile(optimizer='adam', loss='MSE') self.net.fit(X,S, batch_size=batch_size, shuffle=False)""" print( f"epochs {self.net.epochs} | loss1 = {np.sum(loss1):.2f} | loss2 = {np.sum(loss2):.2f}\n" ) else: with tf.GradientTape() as t: output = self.net(X) loss = losses.MSE(S, output) grads = t.gradient(loss, self.net.trainable_variables) optimizer.apply_gradients( zip(grads, self.net.trainable_variables)) """ if e+i == 0: self.net.compile(optimizer='adam', loss='MSE') self.net.fit(X,S, batch_size=batch_size, shuffle=False)""" print( f"epochs {self.net.epochs} | loss1 = {np.sum(loss):.2f}\n" ) self.net.epochs += 1 self.net.save_model('model_check_points')
def step_and_eval(step): if __name__ == "__main__": gamma = 0.99 p_lr = 0.01 v_lr = 0.001 lam = 0.97 epochs = 50 delta = 0.01 damping_coeff = 0.1 cg_iters = 10 backtrack_iters = 10 backtrack_coeff = 0.8 train_value_iterations = 80 num_episodes = 1000 local_steps_per_epoch = 2000 info_shapes = ?? ## Still to be defined env = gym.make('CartPole-v0') agent = Agent(env.action_space.n) Experience = namedtuple('Experience', ['states','actions', 'rewards']) temp_Experience = namedtuple('Experience', ['states','actions', 'rewards', 'values']) policy_net = Model(len(env.observation_space.sample()), [64,64], env.action_space.n, 'policy_net') value_net = Model(len(env.observation_space.sample()), [32], 0, 'value_net') memory = ReplayMemory(local_steps_per_epoch) temp_memory = ReplayMemory(local_steps_per_epoch) optimizer_policy_net = tf.optimizers.Adam(p_lr) optimizer_value_net = tf.optimizers.Adam(v_lr) # Why define the number of local iterations ? We can also define the number of episode to run # and then update the policy paramaeters. for epoch in range(epochs): state = env.reset() done = False ep_rewards = [] returns = [] advantage = [] log_probs = [] avg_rewards = [] finished_rendering_this_epoch = False for t in range(local_steps_per_epoch): # To render the gym env once every epoch if (not finished_rendering_this_epoch): pass #env.render() action = agent.select_action(state, policy_net) #log_probs = tf.math.reduce_sum(policy_net(np.atleast_2d(np.array(state.reshape(1,-1))).astype('float32')) * tf.one_hot(np.array(action), env.action_space.n), axis=1) value = tf.squeeze(value_net(np.atleast_2d(np.array(state.reshape(1,-1))).astype('float32'))) next_state, reward, done, _ = env.step(action.numpy()) state = next_state memory.push(Experience(state, action, reward)) temp_memory.push(temp_Experience(state, action, reward, value)) ep_rewards.append(reward) if done or (t+1 == local_steps_per_epoch): returns += list(memory.return_func(ep_rewards, gamma)) temp = temp_Experience(*zip(*temp_memory.memory)) last_val = 0 if done else tf.squeeze(value_net(np.atleast_2d(np.array(state.reshape(1,-1)).astype('float32')))) temp_states, temp_actions, temp_rewards, temp_values = np.asarray(temp[0]),np.asarray(temp[1]),np.asarray(temp[2]),np.asarray(temp[3]) temp_values = np.append(temp_values, last_val) delta = temp_rewards + gamma * temp_values[1:] - temp_values[:-1] advantage += list(memory.advantage_func(delta, gamma*lam)) temp_memory.clear_memory() # If trajectory ends and the episode does not, we should bootstrap for the remaining value #memory.update(last_val) avg_rewards.append(sum(ep_rewards)) state, done, ep_rewards = env.reset(), False, [] finished_rendering_this_epoch = True # Updating the policy and value function buf = Experience(*zip(*memory.memory)) states, actions, rewards = np.asarray(buf[0]),np.asarray(buf[1]),np.asarray(buf[2]) avg_rewards = np.mean(np.asarray(avg_rewards)) advantage = normalize(advantage) for iteration in range(backtrack_iters): k_l, a_l_new = set_and_eval(backtrack_coeff**iteration) if iteration == backtrack_iters-1: k_l, a_l_new = set_and_eval(0.) # Training the value function with tf.GradientTape() as value_tape: value_loss = kls.MSE(returns,tf.squeeze(value_net(np.atleast_2d(np.array(states)).astype('float32')))) value_variables = value_net.trainable_variables value_gradients = value_tape.gradient(value_loss, value_variables) for iteration in range(train_value_iterations): optimizer_value_net.apply_gradients(zip(value_gradients, value_variables)) with summary_writer.as_default(): tf.summary.scalar('Episode_returns', sum(returns), step = epoch) tf.summary.scalar('Running_avg_reward', avg_rewards, step = epoch) tf.summary.scalar('Losses', policy_loss, step = epoch) if epoch%1 == 0: print(f"Episode: {epoch} |Losses: {policy_loss: 0.2f}| Return: {sum(returns)}| Avg_reward: {avg_rewards: 0.2f}") sys.stdout.flush() render_var = input("Do you want to render the env(Y/N) ?") if render_var == 'Y' or render_var == 'y': n_render_iter = int(input("How many episodes? ")) for i in range(n_render_iter): state = env.reset() while True: action = agent.select_action(state, policy_net) env.render() n_state, reward, done, _ = env.step(action.numpy()) if done: break else: print("Thankyou for using!") env.close()
def imitation_objective(self, batch, validation=False): """Imitation learning objective with `concordance loss` (i.e. a loss that encourages the network to make consistent predictions among augmented and non-augmented batches of data) """ states, aug_states, speed, similarity = batch true_actions = utils.to_float(states['action']) true_values = states['value'] # prediction on NON-augmented and AUGMENTED states policy, value = self.network.imitation_predict(states) policy_aug, value_aug = self.network.imitation_predict(aug_states) # actions, values, speed, and similarities actions, actions_aug = utils.to_float(policy['actions']), utils.to_float(policy_aug['actions']) values, values_aug = value['value'], value_aug['value'] pi_speed, pi_speed_aug = policy['speed'], policy_aug['speed'] v_speed, v_speed_aug = value['speed'], value_aug['speed'] pi_similarity, pi_similarity_aug = policy['similarity'], policy_aug['similarity'] v_similarity, v_similarity_aug = value['similarity'], value_aug['similarity'] if not validation: self.log_actions(actions_pred_imitation=actions, actions_pred_aug_imitation=actions_aug) self.log(values_pred_imitation=values, values_pred_aug_imitation=values_aug, speed_pi=pi_speed, speed_pi_aug=pi_speed_aug, speed_v=v_speed, speed_v_aug=v_speed_aug, similarity_pi=pi_similarity, similarity_pi_aug=pi_similarity_aug, similarity_v=v_similarity, similarity_v_aug=v_similarity_aug) # loss policy = sum of per-action MAE error loss_policy = (tf.reduce_mean(tf.reduce_sum(tf.abs(true_actions - actions), axis=1)) + tf.reduce_mean(tf.reduce_sum(tf.abs(true_actions - actions_aug), axis=1))) / 2.0 loss_value = (tf.reduce_mean(losses.MSE(y_true=true_values, y_pred=values)) + tf.reduce_mean(losses.MSE(y_true=true_values, y_pred=values_aug))) / 2.0 loss_speed_policy = (tf.reduce_mean(losses.MSE(y_true=speed, y_pred=pi_speed)) + tf.reduce_mean(losses.MSE(y_true=speed, y_pred=pi_speed_aug))) / 2.0 loss_speed_value = (tf.reduce_mean(losses.MSE(y_true=speed, y_pred=v_speed)) + tf.reduce_mean(losses.MSE(y_true=speed, y_pred=v_speed_aug))) / 2.0 loss_similarity_policy = (tf.reduce_mean(losses.MSE(y_true=similarity, y_pred=pi_similarity)) + tf.reduce_mean(losses.MSE(y_true=similarity, y_pred=pi_similarity_aug))) / 2.0 loss_similarity_value = (tf.reduce_mean(losses.MSE(y_true=similarity, y_pred=v_similarity)) + tf.reduce_mean(losses.MSE(y_true=similarity, y_pred=v_similarity_aug))) / 2.0 # concordance loss: make both prediction be close as possible concordance_policy = (tf.reduce_mean(losses.MSE(actions, actions_aug)) + tf.reduce_mean(losses.MSE(pi_speed, pi_speed_aug)) + tf.reduce_mean(losses.MSE(pi_similarity, pi_similarity_aug))) / 3.0 concordance_value = (tf.reduce_mean(losses.MSE(values, values_aug)) + tf.reduce_mean(losses.MSE(v_speed, v_speed_aug)) + tf.reduce_mean(losses.MSE(v_similarity, v_similarity_aug))) / 3.0 # total loss total_loss_policy = \ loss_policy + self.aux * (loss_speed_policy + loss_similarity_policy) + self.delta * concordance_policy total_loss_value = \ loss_value + self.aux * (loss_speed_value + loss_similarity_value) + self.eta * concordance_value if not validation: self.log(loss_policy=loss_policy, loss_value=loss_value, loss_speed_policy=loss_speed_policy, loss_similarity_policy=loss_similarity_policy, loss_speed_value=loss_speed_value, loss_similarity_value=loss_similarity_value, loss_concordance_policy=concordance_policy, loss_concordance_value=concordance_value, # loss_steer=steer_penalty, loss_throttle=throttle_penalty, loss_entropy=entropy_penalty ) return total_loss_policy, total_loss_value
valList.append(np.mean(vLoss)) df = pd.DataFrame({"trainLoss": trainList, "valLoss": valList}) df.to_csv("RCC_DumbellXL_4x_MSE.csv", sep=",", header=True, encoding="UTF-8") dL = dataOp.data_loader("C:/Datasets/MRI_Data/Recon_v4/Val", 1, 4, 10, False) d = dL.__getitem__(200) out = model.predict(d) out = np.reshape(out, (256, 256, 2)) out = out[:, :, 0] + 1j * out[:, :, 1] plt.figure() plt.subplot(1, 2, 1) plt.imshow(np.abs(out), cmap='gray') plt.title('Magnitude') plt.colorbar(orientation='horizontal', shrink=0.9) plt.subplot(1, 2, 2) plt.imshow(np.angle(out), cmap='gray') plt.title('Phase') plt.colorbar(orientation='horizontal', shrink=0.9) plt.show() y_pred = model.predict(d) y_true = ifftConv(d[0]) loss = losses.MSE(y_true, y_pred) print(y_pred.shape, y_true.shape, loss.shape)
def vae_loss(y_true, y_pred): xent_loss = losses.MSE(y_true, y_pred) kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma)) loss = xent_loss + kl_loss return loss