def trainer(self): steps = 0 loss_deque = deque(maxlen=100) train_loss = [] last_epoch = 0 if self.opts.resume: last_epoch, loss = self.load_progress() for e in range(self.opts.epoch - last_epoch): '''Adaptive LR Change''' for param_group in self.RNN_optim.param_groups: param_group['lr'] = util.linear_LR(e, self.opts) print('epoch: {}, RNN_LR: {:.4}'.format(e, param_group['lr'])) if self.opts.save_progress: '''Save the progress before start adjusting the LR''' if e == self.opts.const_epoch: self.save_progress(self.opts.const_epoch, np.mean(loss_deque)) if e % self.opts.save_every == 0: self.save_progress(e, np.mean(loss_deque)) for data, labels, lengths in self.data_loader: steps += 1 data, labels, lengths = util.sort_batch(data, labels, lengths) #data = data[:lengths] #labels = labels[:lengths] self.RNN_optim.zero_grad() loss = self.RNN(data.to(device), labels.to(device), lengths.to(device)) loss.backward() self.RNN_optim.step() loss_deque.append(loss.cpu().item()) train_loss.append(np.mean(loss_deque)) if steps % self.opts.print_every == 0: print('Epoch: {}, Steps: {}, Loss: {:.4}'.format( e, steps, loss.item())) util.raw_score_plotter(train_loss) if self.opts.save_progress: '''Save the progress before start adjusting the LR''' self.save_progress(-1, np.mean(loss_deque)) util.raw_score_plotter(train_loss)
def train(self): steps = 0 loss_deque = deque(maxlen=100) train_loss = [] for e in range(self.opts.epoch): running_loss = 0 correct = 0 total = 0 for images, labels in iter(self.trainloader): steps += 1 output = self.model(images.to(device)) self.optimizer.zero_grad() loss = self.criterion(output, labels.to(device).long().squeeze(1)) loss.backward() self.optimizer.step() running_loss += loss.item() pred = torch.max(output, 1)[1] for pred, label in zip(pred, labels): if pred.cpu().item() == label.item(): correct += 1 total += 1 loss_deque.append(loss.cpu().item()) train_loss.append(np.mean(loss_deque)) if steps % self.opts.print_every == 0: print( "Epoch: {}/{}...".format(e + 1, self.opts.epoch), "LossL {:.4f}".format(running_loss / self.opts.print_every), "Running Accuracy {:4f}".format(correct / np.float(total))) running_loss = 0 util.raw_score_plotter(train_loss)
def train(self, D, D_solver, criterion, dataloader): steps = 0 loss_deque = deque(maxlen=100) train_loss = [] for epoch in range(self.opts.epoch): running_loss = 0 correct = 0 total = 0 for x, y in dataloader: if len(x) != self.opts.batch: continue D_solver.zero_grad() '''Real Images''' output = D(x.to(device)) # returns logit of real data. #print(output.shape) loss = criterion(output, y.to(device)) loss.backward() D_solver.step() # One step Descent into loss correct_, total_ = util.prediction_accuracy(output, y.to(device)) correct += correct_ total += total_ loss_deque.append(loss.cpu().item()) train_loss.append(np.mean(loss_deque)) if steps % self.opts.print_every == 0: print("Epoch: {}/{}...".format(epoch + 1, self.opts.epoch), "LossL {:.4f}".format(running_loss / self.opts.print_every), "Running Accuracy {:4f}".format(correct / np.float(total))) running_loss = 0 util.raw_score_plotter(train_loss) if self.opts.save_progress: print('\nSaving the model\n') torch.save(D.state_dict(), self.opts.model_path)
def trainer(opts, RNN, RNN_optim, criterion, data_loader): steps = 0 for e in range(opts.epoch): for data, labels, lengths in data_loader: steps += 1 data, labels, lengths = util.sort_batch(data, labels, lengths) RNN_optim.zero_grad() pred = RNN(data, lengths) loss = criterion(pred, labels.to(device)) loss.backward() RNN_optim.step() loss_deque.append(loss.cpu().item()) train_loss.append(np.mean(loss_deque)) if steps % opts.print_every == 0: print('Epoch: {}, Steps: {}, Loss: {:.4},'.format( e, steps, loss.item())) util.raw_score_plotter(train_loss)
def trainer(opts, RNN, RNN_optim, criterion, loader): last_100_loss = deque(maxlen=100) last_100_g_loss = [] iter_count = 0 for epoch in range(opts.epoch): for param_group in RNN_optim.param_groups: param_group['lr'] = util.linear_LR(epoch, opts) print('Epoch: {}, D_LR: {:.4}'.format(epoch, param_group['lr'])) for image, label in loader: '''Images''' image = image.view(-1, 28, 28) image = image.to(device) label = label.to(device) '''run the data through RNN''' output = RNN(image) loss = criterion(output, label) '''take a gradient step''' RNN_optim.zero_grad() loss.backward() RNN_optim.step() # One step Descent into loss '''plot the loss''' last_100_loss.append(loss.item()) last_100_g_loss.append(np.mean(last_100_loss)) util.raw_score_plotter(last_100_g_loss) '''Train Generator''' iter_count += 1 if iter_count % opts.print_every == 0: print('Epoch: {}, Iter: {}, Loss: {:.4},'.format( epoch, iter_count, loss.item()))
def train_CycleGan(train_step, loss, reconstruction, show_every=opts.show_every, print_every=opts.print_every, batch_size=128, num_epoch=10): """ function that trains VAE. :param train_step: an op that defines what to do with the loss function (minimize or maximize) :param loss: an op that defines the loss function to be minimized :param reconstruction: an op that defines how to reconstruct a target image :param show_every: how often to show an image to gauge training progress :param print_every: how often to print loss :param batch_size: batch size of training samples :param num_epoch: how many times to iterate over the training samples :return: """ image_dir = '/home/youngwook/Downloads/edges2shoes' folder_names = get_folders(image_dir) train_folder = folder_names[2] val_folder = folder_names[1] train_data = AB_Combined_ImageLoader(train_folder, size=opts.resize, num_images=opts.num_images, randomcrop=opts.image_shape) train_loader = DataLoader(train_data, batch_size=opts.batch, shuffle=True, num_workers=12) step = 0 target_pred_list = [] input_pred_list = [] input_true_list = [] target_true_list = [] last_100_loss_dq = deque(maxlen=100) last_100_loss = [] checkpoint_dir = './model' saver = tf.train.Saver() if opts.resume: #print('Loading Saved Checkpoint') tf_util.load_session(checkpoint_dir, saver, session, model_name=opts.model_name) for epoch in range(num_epoch): # every show often, show a sample result lr = util.linear_LR(epoch, opts) for (minibatch, minbatch_y) in train_loader: # run a batch of data through the network # logits= sess.run(logits_real, feed_dict={x:minibatch}) target_pred, input_pred = session.run( [target_image_prediction, input_image_prediction], feed_dict={ input_image: minibatch, target_image: minbatch_y, adaptive_lr: lr }) input_memory.append(input_pred) target_memory.append(target_pred) target_replay_images = np.vstack(target_memory) input_replay_images = np.vstack(input_memory) #train the Generator _, G_loss_curr = session.run( [train_step[2], loss[1]], feed_dict={ input_image: minibatch, target_image: minbatch_y, input_replay: input_replay_images, target_replay: target_replay_images, input_image_pred: input_pred, target_image_pred: target_pred, adaptive_lr: lr }) #train the discriminator _, D_loss_curr = session.run( [train_step[0], loss[0][0]], feed_dict={ input_image: minibatch, input_replay: input_replay_images, adaptive_lr: lr }) _, D_loss_curr = session.run( [train_step[1], loss[0][1]], feed_dict={ target_image: minbatch_y, target_replay: target_replay_images, adaptive_lr: lr }) last_100_loss_dq.append(G_loss_curr) last_100_loss.append(np.mean(last_100_loss_dq)) step += 1 if step % show_every == 0: '''for every show_every step, show reconstructed images from the training iteration''' target_name = './img/target_pred_%s.png' % step input_name = './img/input_pred_%s.png' % step input_true_name = './img/true_input_%s.png' % step target_true_name = './img/true_target_%s.png' % step #translate the image target_pred, input_pred = session.run( [target_image_prediction, input_image_prediction], feed_dict={ input_image: minibatch, target_image: minbatch_y }) target_pred_list.append(target_name) input_pred_list.append(input_name) input_true_list.append(input_true_name) target_true_list.append(target_true_name) util.show_images(target_pred[:opts.batch], opts, target_name) util.plt.show() util.show_images(minbatch_y[:opts.batch], opts, target_true_name) util.plt.show() util.show_images(input_pred[:opts.batch], opts, input_name) util.plt.show() util.show_images(minibatch[:opts.batch], opts, input_true_name) util.plt.show() if step % print_every == 0: print('Epoch: {}, D: {:.4}'.format(epoch, G_loss_curr)) util.raw_score_plotter(last_100_loss) #save the model after every epoch if opts.save_progress: tf_util.save_session(saver, session, checkpoint_dir, epoch, model_name=opts.model_name) util.raw_score_plotter(last_100_loss) image_to_gif('', target_pred_list, duration=0.5, gifname='target_pred') image_to_gif('', input_pred_list, duration=0.5, gifname='input_pred') image_to_gif('', input_true_list, duration=0.5, gifname='input_true') image_to_gif('', target_true_list, duration=0.5, gifname='target_true')
#a deque object used to hold values for the last 100 scores total_reward_window.append(rewards) #a list that holds the average score of the last 100 episodes avg_score_last_100.append(np.mean(total_reward_window)) #total_reward holds all the rewards total_reward.append(rewards) #epsilon is decayed after every episode epsilon = max(min_epsilon, epsilon * decay) print('\rEpisode {}\tAverage Score: {:.3f}\tScore: {:.3f}'.format(episodes, avg_score_last_100[-1], rewards), end="") if episodes % PRINT_EVERY == 0: print('\rEpisode {}\tAverage Score: {:.3f}\tScore: {:.3f}'.format(episodes, avg_score_last_100[-1], rewards)) if avg_score_last_100[-1] >= threshold: print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.3f}\tScore: {:3f}'.format(episodes - 100, avg_score_last_100[-1], rewards)) torch.save(agent.local_model.state_dict(), 'successful_model.pth') break return total_reward, avg_score_last_100 num_episodes = 10000 threshold = 13 scores, avg_last_100 = DQN(num_episodes, threshold) raw_score_plotter(scores) plotter(env_name, len(scores), avg_last_100, threshold)
def train(self): """ Vanilla GAN Trainer :param D: Discriminator :param G: Generator :param D_solver: Optimizer for D :param G_solver: Optimizer for G :param discriminator_loss: Loss for D :param generator_loss: Loss for G :param loader: Torch dataloader :param show_every: Show samples after every show_every iterations :param batch_size: Batch Size used for training :param noise_size: Dimension of the noise to use as input for G :param num_epochs: Number of epochs over the training dataset to use for training :return: """ last_100_loss = deque(maxlen=100) last_100_g_loss = [] iter_count = 0 last_epoch = 0 if self.opts.resume: last_epoch, loss = self.load_progress() for epoch in range(self.opts.epoch - last_epoch): '''Adaptive LR Change''' for param_group in self.D_solver.param_groups: param_group['lr'] = util.linear_LR(epoch, self.opts) print('epoch: {}, D_LR: {:.4}'.format(epoch, param_group['lr'])) if self.opts.save_progress: '''Save the progress before start adjusting the LR''' if epoch == self.opts.const_epoch: self.save_progress(self.opts.const_epoch, np.mean(last_100_loss)) for image, label in self.loader: '''Real Images''' image = image.to(device) '''one hot encode the real label''' label = label.float().to(device) '''Train Discriminator''' '''Get the logits''' real_logits_cls = self.D(image.to(device)) loss = self.opts.cls_lambda * F.binary_cross_entropy_with_logits( real_logits_cls, label, reduction='sum') / real_logits_cls.size(0) self.D_solver.zero_grad() loss.backward() self.D_solver.step() # One step Descent into loss '''Train Generator''' iter_count += 1 last_100_loss.append(loss.cpu().item()) last_100_g_loss.append(np.mean(last_100_loss)) if iter_count % self.opts.print_every == 0: print('Epoch: {}, Iter: {}, D: {:.4} '.format( epoch, iter_count, loss.item())) util.raw_score_plotter(last_100_g_loss) if self.opts.save_progress: if iter_count % self.opts.save_every == 0: self.save_progress(epoch, np.mean(last_100_loss)) if self.opts.save_progress: '''Save the progress before start adjusting the LR''' self.save_progress(-1, np.mean(last_100_loss))
def main(): seeding() # number of parallel agents env = UnityEnvironment(file_name="Tennis.x86_64") env_name = 'Tennis' # get the default brain brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=True)[brain_name] # number of agents num_agents = len(env_info.agents) # size of each action action_size = brain.vector_action_space_size # examine the state space states = env_info.vector_observations state_size = states.shape[-1] # number of training episodes. # change this to higher number to experiment. say 30000. number_of_episodes = 10000 episode_length = 10000 batchsize = 128 # amplitude of OU noise # this slowly decreases to 0 noise = 1 noise_reduction = 0.9999 log_path = os.getcwd() + "/log" model_dir = os.getcwd() + "/model_dir" os.makedirs(model_dir, exist_ok=True) # initialize memory buffer buffer = ReplayBuffer(int(500000), batchsize, 0) # initialize policy and critic maddpg = MADDPG(state_size, action_size, num_agents, seed=12345, discount_factor=0.95, tau=0.02) #how often to update the MADDPG model episode_per_update = 2 # training loop PRINT_EVERY = 5 scores_deque = deque(maxlen=100) # holds raw scores scores = [] # holds avg scores of last 100 epsiodes avg_last_100 = [] threshold = 0.5 # use keep_awake to keep workspace from disconnecting for episode in range(number_of_episodes): env_info = env.reset( train_mode=True)[brain_name] # reset the environment state = env_info.vector_observations # get the current state (for each agent) episode_reward_agent0 = 0 episode_reward_agent1 = 0 for agent in maddpg.maddpg_agent: agent.noise.reset() for episode_t in range(episode_length): actions = maddpg.act(torch.tensor(state, dtype=torch.float), noise=noise) noise *= noise_reduction actions_array = torch.stack(actions).detach().numpy() env_info = env.step(actions_array)[brain_name] next_state = env_info.vector_observations reward = env_info.rewards done = env_info.local_done episode_reward_agent0 += reward[0] episode_reward_agent1 += reward[1] # add data to buffer ''' I can either hstack or concat two states here or do it in the update function in MADDPG However I think it's easier to do it here, since in the update function I have batch_size to deal with Although the replay buffer would have to hold more data by preprocessing and creating 2 new variables that hold essentially the same info as state, and next_state, but just concatenated. ''' full_state = np.concatenate((state[0], state[1])) full_next_state = np.concatenate((next_state[0], next_state[1])) buffer.add(state, full_state, actions_array, reward, next_state, full_next_state, done) state = next_state # update once after every episode_per_update if len(buffer) > batchsize and episode % episode_per_update == 0: for i in range(num_agents): samples = buffer.sample() maddpg.update(samples, i) maddpg.update_targets( ) # soft update the target network towards the actual networks if np.any(done): #if any of the agents are done break break episode_reward = max(episode_reward_agent0, episode_reward_agent1) scores.append(episode_reward) scores_deque.append(episode_reward) avg_last_100.append(np.mean(scores_deque)) # scores.append(episode_reward) print('\rEpisode {}\tAverage Score: {:.4f}\tScore: {:.4f}'.format( episode, avg_last_100[-1], episode_reward), end="") if episode % PRINT_EVERY == 0: print('\rEpisode {}\tAverage Score: {:.4f}'.format( episode, avg_last_100[-1])) # saving successful model #training ends when the threshold value is reached. if avg_last_100[-1] >= threshold: save_dict_list = [] for i in range(num_agents): save_dict = { 'actor_params': maddpg.maddpg_agent[i].actor.state_dict(), 'actor_optim_params': maddpg.maddpg_agent[i].actor_optimizer.state_dict(), 'critic_params': maddpg.maddpg_agent[i].critic.state_dict(), 'critic_optim_params': maddpg.maddpg_agent[i].critic_optimizer.state_dict() } save_dict_list.append(save_dict) torch.save( save_dict_list, os.path.join(model_dir, 'episode-{}.pt'.format(episode))) # plots graphs raw_score_plotter(scores) plotter(env_name, len(scores), avg_last_100, threshold) break
def trainer(self): steps = 0 correct = 0 total = 0 loss_deque = deque(maxlen=100) train_loss = [] last_epoch = 0 if self.opts.resume: last_epoch, loss = self.load_progress() for e in range(self.opts.epoch - last_epoch): '''Adaptive LR Change''' for param_group in self.RNN_optim.param_groups: param_group['lr'] = util.linear_LR(e, self.opts) print('epoch: {}, RNN_LR: {:.4}'.format(e, param_group['lr'])) if self.opts.save_progress: '''Save the progress before start adjusting the LR''' if e == self.opts.const_epoch: self.save_progress(self.opts.const_epoch, np.mean(loss_deque)) if e % self.opts.save_every == 0: self.save_progress(e, np.mean(loss_deque)) for data, labels, lengths in self.data_loader: steps += 1 data, labels, lengths = util.sort_batch(data, labels, lengths) self.RNN_optim.zero_grad() pred = self.RNN(data, lengths) loss = self.criterion(pred, labels.to(device)) loss.backward() self.RNN_optim.step() # pick the argmax output = torch.max(pred, 1)[1] for output, label in zip(output, labels): if output.cpu().item() == label.item(): correct += 1 total += 1 loss_deque.append(loss.cpu().item()) train_loss.append(np.mean(loss_deque)) if steps % self.opts.print_every == 0: print( 'Epoch: {}, Steps: {}, Loss: {:.4}, Train Accuracy {:4}, ' .format(e, steps, loss.item(), correct / float(total))) correct = 0 total = 0 util.raw_score_plotter(train_loss) if self.opts.save_progress: '''Save the progress before start adjusting the LR''' self.save_progress(-1, np.mean(loss_deque)) util.raw_score_plotter(train_loss)