def main(): from tools import plot_data print print print("\t \t \033[1m Creating Plot for Antidunes \033[0m") print print # TODO: Implement choice as to whether density should be calculated or read rom file # FIXME: afjdkajfa fjljk plot_data(save_fig=0, plot_density=1) print("\t\t\t \033[1m D O N E !\033[0m")
def visualisation(self, data): self.predict(data) lc1 = mc.LineCollection(self.lines1, color="r") lc2 = mc.LineCollection(self.lines2, color="b") fig, ax = plt.subplots() ax.add_collection(lc1) ax.add_collection(lc2) ax.autoscale() ax.margins(0.1) tools.plot_data(self.data, self.label) tools.plot_data(data, self.predict(data), dec=2) plt.show()
def begin_DIC(self): ''' Begins DIC analysis, using the selected parameters. ''' # Test initial guess: reply = self.test_init() if reply == QtGui.QMessageBox.No: return # If initial guess test is positive, begin analysis: self.progressBar.show() # Begin timing: beginning_time = time.time() if self.mode == 'integer': result = tools.get_integer_translation( self.mraw_path, self.roi_reference, self.roi_size, (self.N_images, self.h, self.w), progressBar=self.progressBar, n_im=10) inc = result[-1] # Image sequence selection increment. n_iters = [1] errors = dict() # Unit calibration: if self.mm_px != 1: result[0][:, 0] = result[0][:, 0] * self.mm_px result[0][:, 1] = result[0][:, 1] * self.mm_px elif self.mode == 'translation': if self.debug: print('Model: SIMPLE TRANSLATION') #try: result = tools.get_simple_translation( self.mraw_path, self.roi_reference, self.roi_size, (self.N_images, self.h, self.w), progressBar=self.progressBar, increment=self.sequence_increment) #except: #print('An error occurred. Try using a different method.') inc = result[-1] n_iters = [1] errors = dict() # Unit calibration: if self.mm_px != 1: result[0][:, 0] = result[0][:, 0] * self.mm_px result[0][:, 1] = result[0][:, 1] * self.mm_px elif self.mode == 'rigid': if self.debug: print('Model: RIGID') print('Interpolating (cropped?) ROI ({:d} px border).'.format( self.crop_px)) try: result = tools.get_rigid_movement( self.mraw_path, self.roi_reference, self.roi_size, (self.N_images, self.h, self.w), progressBar=self.progressBar, tol=self.conv_tol, maxiter=self.max_iter, int_order=self.int_order, increment=self.sequence_increment, crop=self.crop_px) except ValueError: if self.debug: print( 'An error occurred attemptiong to use cropped ROI, continuing without cropping.' ) result = tools.get_rigid_movement( self.mraw_path, self.roi_reference, self.roi_size, (self.N_images, self.h, self.w), progressBar=self.progressBar, tol=self.conv_tol, maxiter=self.max_iter, int_order=self.int_order, increment=self.sequence_increment, crop=False) n_iters = result[-2] inc = result[-1] errors = result[1] # Unit calibration: if self.mm_px != 1: result[0][:, 0] = result[0][:, 0] * self.mm_px result[0][:, 1] = result[0][:, 1] * self.mm_px elif self.mode == 'deformations': if self.debug: print('Model: DEFORMABLE') print('Interpolating (cropped?) ROI ({:d} px border).'.format( self.crop_px)) try: result = tools.get_affine_deformations( self.mraw_path, self.roi_reference, self.roi_size, (self.N_images, self.h, self.w), progressBar=self.progressBar, tol=self.conv_tol, maxiter=self.max_iter, int_order=self.int_order, increment=self.sequence_increment, crop=self.crop_px) except ValueError: if self.debug: print( 'An error occurred attemptiong to use cropped ROI, continuing without cropping.' ) result = tools.get_affine_deformations( self.mraw_path, self.roi_reference, self.roi_size, (self.N_images, self.h, self.w), progressBar=self.progressBar, tol=self.conv_tol, maxiter=self.max_iter, int_order=self.int_order, increment=self.sequence_increment, crop=False) n_iters = result[-2] inc = result[-1] errors = result[1] # Unit calibration: if self.mm_px != 1: result[0][:, 2] = result[0][:, 2] * self.mm_px result[0][:, 5] = result[0][:, 5] * self.mm_px # Hide ROI center marker self.imw.CHroi.hide() self.ax.hide() self.xlabel.hide() self.ay.hide() self.ylabel.hide() # If maximum number of iterations was reached: if n_iters[-1] == 0: if self.debug: print( '\nMaximum iterations reached. Iteration numbers by image:\n{:}\n' .format( n_iters)) # Print optimization loop iteration numbers. niter_warning = QtGui.QMessageBox.warning( self, 'Waring!', 'Maximum iterations reached in the optimization process ' + '(image {:}).\n(Iterations: mean: {:0.3f}, std: {:0.3f})\n'. format(n_iters[-2] + 1, np.mean(n_iters[:-2]), np.std(n_iters[:-2])) + 'If this occurred early in the analyis process, the selected ' + 'region of interest might be inappropriate.\n' + 'Try moving the ROI or increasing its size.\n\n' + 'Do yo wish to prooceed to analysis resuts anyway?', QtGui.QMessageBox.Yes | QtGui.QMessageBox.No, QtGui.QMessageBox.No) if niter_warning == QtGui.QMessageBox.No: self.to_beginning() return # If warnings errors were raised: if len(errors.keys()) != 0: if self.debug: print( '\nErrors ({:d}) occurred during analysis. See log for more info.' .format(len(errors.keys()))) matrices = [{ key: item['warp_matrix'] } for key, item in errors.items()] pickle.dump(matrices, open(self.save_path + '/warp_matrices.pkl', 'wb')) error_warning = QtGui.QMessageBox.warning( self, 'Waring!', 'Errors occurred during the analysis ' + '(first at image {:d}).\n(Total: {:d} errors\n'.format( min(errors.keys()), len(errors.keys())) + 'Iterations: mean: {:0.3f}, std: {:0.3f})\n'.format( n_iters[-2] + 1, np.mean(n_iters[:-2]), np.std( n_iters[:-2])) + 'If this occurred early in the analyis process, the selected ' + 'region of interest might be inappropriate.\n' + 'Try moving the ROI or increasing its size.\n\n' + 'Do yo wish to prooceed to analysis resuts anyway?', QtGui.QMessageBox.Yes | QtGui.QMessageBox.No, QtGui.QMessageBox.No) if error_warning == QtGui.QMessageBox.No: self.to_beginning() return time_taken = time.time() - beginning_time self.kin = result[0] self.t = np.reshape( np.arange(len(self.kin)) * (inc / self.fps), (len(self.kin), 1)) # Save the results: tkin_data = np.hstack((self.t, self.kin)) timestamp = datetime.datetime.now().strftime('%d-%m-%H-%M-%S') print(self.timestampCheckbox.checkState()) if self.timestampCheckbox.checkState(): stamp = timestamp else: stamp = '' self.save_csv(data=tkin_data, stamp=stamp) self.pickledump(data=tkin_data, stamp=stamp) # Show black image - to close loaded memmap: self.imw.setImage(np.zeros((100, 100))) # End-of-analysis pop-up message: end_reply = QtGui.QMessageBox.question( self, 'Analysis eneded!', '{:} images proessed (in {:0.1f} s).\n'.format( len(self.kin), time_taken) + 'Results saved to:\n{} ({}).\n\n'.format( self.save_path.replace('\\', '/'), timestamp) + 'Do yo wish to prooceed to analysis resuts?', QtGui.QMessageBox.Yes | QtGui.QMessageBox.No, QtGui.QMessageBox.No) # Result visualization: if end_reply == QtGui.QMessageBox.Yes: tools.plot_data(tkin_data, self.unit) self.to_beginning() # Delete temporary file: head, tail = os.path.split(self.mraw_path) if self.image_type in ['tif', 'tiff'] and tail == '_images.npy': delete_temp_reply = QtGui.QMessageBox.question( self, 'Delete temporary files', 'A temporary file has been created from .tif images ' + '({:s}). Do you wish to remove it? '.format(self.mraw_path) + '(Select "No", if you plan to analyse the same image sequence again.)', QtGui.QMessageBox.Yes | QtGui.QMessageBox.No, QtGui.QMessageBox.Yes) if delete_temp_reply == QtGui.QMessageBox.Yes: if self.debug: print('Deleting temporary .npy file.') os.remove(self.mraw_path)
def train(net, data, optimizer, model_path, plot_dir, batch_size, epochs, cuda=False, grad_clip=None, target_net=None, env=None, low=0, high=0.05, target_test_episodes=1): """ Train the QBN :param net: given network :param data: given data to train the network on :param optimizer: optimizer method(Adam is preferred) :param model_path: path to where save the model :param plot_dir: path to where save the plots :param batch_size: batch size :param epochs: number of training epochs :param cuda: check if cuda is available :param grad_clip: max norm of the gradients :param target_net: :param env: environment :param low: lower bound of noise data :param high: upper bound of noise data :param target_test_episodes: number of episodes to test on :return: returns the trained model """ mse_loss = nn.MSELoss().cuda() if cuda else nn.MSELoss() train_data, test_data = data min_loss_i, best_perf_i = None, None batch_loss_data, epoch_losses, test_losses, test_perf_data = [], [], [], [] total_batches = math.ceil(len(train_data) / batch_size) for epoch in range(epochs): net.train() batch_losses = [] random.shuffle(train_data) for b_i in range(total_batches): batch_input = train_data[(b_i * batch_size):(b_i * batch_size) + batch_size] batch_target = Variable(torch.FloatTensor(batch_input)) batch_input = torch.FloatTensor(batch_input) batch_input = Variable(batch_input, requires_grad=True) if cuda: batch_input, batch_target = batch_input.cuda( ), batch_target.cuda() batch_output, _ = net(batch_input) optimizer.zero_grad() loss = mse_loss(batch_output, batch_target) loss.backward() batch_losses.append(loss.item()) if grad_clip is not None: torch.nn.utils.clip_grad_norm_(net.parameters(), grad_clip) optimizer.step() logger.info('epoch: %d batch: %d loss: %f' % (epoch, b_i, loss.item())) batch_loss_data += batch_losses epoch_losses.append(round(np.average(batch_losses), 5)) test_losses.append( round(test(net, test_data, len(test_data), cuda=cuda), 5)) test_perf = test_with_env(target_net(net), env, target_test_episodes, cuda=cuda) test_perf_data.append(test_perf) if (best_perf_i is None) or (test_perf_data[best_perf_i] <= test_perf_data[-1] ) or test_perf_data[-1] == env.spec.reward_threshold: torch.save(net.state_dict(), model_path) logger.info('Bottle Net Model Saved!') if (best_perf_i is None) or (test_perf_data[best_perf_i] < test_perf_data[-1]): best_perf_i = len(test_perf_data) - 1 logger.info('Best Perf i updated') if (min_loss_i is None) or (test_losses[min_loss_i] > test_losses[-1]): min_loss_i = len(test_losses) - 1 logger.info('min_loss_i updated') plot_data( verbose_data_dict(test_losses, epoch_losses, batch_loss_data, test_perf_data), plot_dir) logger.info('epoch: %d test loss: %f best perf i: %d min loss i: %d' % (epoch, test_losses[-1], best_perf_i, min_loss_i)) if np.isnan(batch_losses[-1]): logger.info('Batch Loss: Nan') break if ((len(test_losses) - 1 - min_loss_i) > 50) or (test_losses[-1] == 0): logger.info('Test Loss hasn\'t improved in last 50 epochs' if test_losses[-1] != 0 else 'Zero Test Loss!!') logger.info('Stopping!') break net.load_state_dict(torch.load(model_path)) return net
tmp = list(zip(*[get_usps(i, datax, datay) for i in l])) tmpx, tmpy = np.vstack(tmp[0]), np.hstack(tmp[1]) idx = np.random.permutation(range(len(tmpy))) return tmpx[idx, :], tmpy[idx] def show_usps(data): plt.imshow(data.reshape((16, 16)), interpolation="nearest", cmap="gray") ### Donnees artificielles plt.ion() xgentrain, ygentrain = gen_arti(data_type=0, sigma=0.5, nbex=1000, epsilon=0.1) xgentest, ygentest = gen_arti(data_type=0, sigma=0.5, nbex=1000, epsilon=0.1) plt.figure() plot_data(xgentrain, ygentrain) ### Donnees reelles plt.figure() xuspstrain, yuspstrain = load_usps("USPS/USPS_train.txt") xuspstest, yuspstest = load_usps("USPS/USPS_test.txt") x06train, y06train = get_usps([0, 6], xuspstrain, yuspstrain) x06test, y06test = get_usps([0, 6], xuspstest, yuspstest) show_usps(x06train[0]) def f(X): return np.linalg.norm(X, axis=1) #### Pour la visualisation des couts
def train(self, net, env_fn, net_path, plots_dir, args): optimizer = Adam(net.parameters(), lr=args.lr) mse_loss = nn.MSELoss.cuda() if args.cuda else nn.MSELoss() test_perf_data = [] train_perf_data = [] best = None # n_trajectory_loss = [] n_trajectory_info = [] for episode in range(1, args.train_episodes + 1): net.train() env = env_fn() # Gather data for a single episode done = False total_reward = 0 log_probs = [] ep_rewards = [] critic_info = [] ep_obs = [] obs = env.reset() while not done: ep_obs.append(obs) obs = Variable(torch.FloatTensor(obs.tolist())).unsqueeze(0) action_probs, critic = net(obs) m = Categorical(action_probs) action = m.sample() log_probs.append(m.log_prob(Variable(action.data))) action = int(action.data[0]) obs, reward, done, info = env.step(action) ep_rewards.append(reward) critic_info.append(critic) total_reward += reward train_perf_data.append(total_reward) n_trajectory_info.append( (ep_obs, ep_rewards, critic_info, log_probs)) # Update the network after collecting n trajectories if episode % args.batch_size == 0: optimizer.zero_grad() critic_loss = 0 for trajectory_info in n_trajectory_info: obs, _rewards, _critic_info, _log_probs = trajectory_info for i, r in enumerate(_rewards): critic = _critic_info[i] if i != len(_rewards) - 1: target_critic = r + Variable( _critic_info[i + 1].data) else: target_critic = Variable(torch.Tensor([[r]])) critic_loss += mse_loss(critic, target_critic) critic_loss = critic_loss / args.batch_size critic_loss.backward(retain_graph=True) optimizer.step() optimizer.zero_grad() actor_loss = 0 for trajectory_info in n_trajectory_info: obs, _rewards, _critic_info, _log_probs = trajectory_info for i, r in enumerate(_rewards): _, v_state = net( Variable(torch.FloatTensor( obs[i].tolist())).unsqueeze(0)) v_state = Variable(v_state.data) if i != len(_rewards) - 1: _, v_next_state = net( Variable(torch.FloatTensor( obs[i + 1].tolist())).unsqueeze(0)) v_next_state = Variable(v_next_state.data) else: v_next_state = 0 advantage = r + v_next_state - v_state actor_loss -= _log_probs[i] * advantage actor_loss = actor_loss / args.batch_size actor_loss.backward() optimizer.step() n_trajectory_info = [] print('Train=> Episode:{} Reward:{} Length:{}'.format( episode, total_reward, len(ep_rewards))) # test and log if episode % 20 == 0: test_reward = self.test(net, env_fn, 10, log=True) test_perf_data.append(test_reward) print('Test Performance:', test_reward) if best is None or best <= test_reward: torch.save(net.state_dict(), net_path) best = test_reward print('Model Saved!') if best == env.reward_threshold: print('Optimal Performance achieved!!') break if episode % 10 == 0: plot_data( self.__get_plot_data_dict(train_perf_data, test_perf_data), plots_dir) return net
import tools def prixMaison(taille): return taille * (10**4) data = ([20, 2], [40, 4], [80, 8], [30, 2.5], [70, 5], [80, 6]) #data += ([150, 6.5], [200, 11], [90, 7.5]) a = (4 - 2) / (40 - 20) # y=ax+b b = 2 - a * 20 print('Une maison de 30m² coute ' + str(prixMaison(30))) print('Une maison de 80m² coute ' + str(prixMaison(80))) print('Une maison de 90m² coute ' + str(prixMaison(90))) print(tools.LSE(data, [a, b])) meilleur = tools.LSE(data, [a, b]) for i in range(-10000, 10000, 1): test = a + (i / 10000) if tools.LSE(data, [test, b]) < meilleur: meilleur = tools.LSE(data, [test, b]) aOpti = test print(aOpti) print(tools.LSE(data, [aOpti, b])) print(tools.reg_lin(data)) tools.plot_data(data, aOpti, b)
def train(self, net, env_fn, net_path, plots_dir, args): optimizer = Adam(net.parameters(), lr=args.lr) mse_loss = nn.MSELoss().cuda() if args.cuda else nn.MSELoss() test_perf_data = [] train_perf_data = [] best = None # n_trajectory_loss = [] n_trajectory_info = [] for episode in range(1, args.train_episodes + 1): net.train() env = env_fn() # Gather data for a single episode done = False total_reward = 0 log_probs = [] ep_rewards = [] critic_info = [] ep_obs = [] obs = env.reset() entropies = [] while not done: ep_obs.append(obs) obs = Variable(torch.FloatTensor(obs.tolist())).unsqueeze(0) if args.cuda: obs = obs.cuda() logit, critic = net(obs) action_probs = F.softmax(logit, dim=1) action_log_prob = F.log_softmax(logit, dim=1) entropy = -(action_log_prob * action_probs).sum(1) entropies.append(entropy) m = Categorical(action_probs) action = m.sample() log_probs.append(m.log_prob(Variable(action.data))) action = int(action.data[0]) obs, reward, done, info = env.step(action) ep_rewards.append(reward) critic_info.append(critic) total_reward += sum(reward) train_perf_data.append(total_reward) n_trajectory_info.append( (ep_obs, ep_rewards, critic_info, log_probs, entropies)) # Update the network after collecting n trajectories if episode % args.batch_size == 0: # critic update # TODO: Optimize critic update by calculating MSE once for everything optimizer.zero_grad() critic_loss = 0 for trajectory_info in n_trajectory_info: obs, _rewards, _critic_info, _log_probs, _ = trajectory_info for i in range(len(obs)): critic = _critic_info[i] target_critic = [] for r_i, r in enumerate(_rewards[i]): if i != len(obs) - 1: target_critic.append( r + args.gamma * _critic_info[i + 1].data.cpu().numpy()[0][r_i]) else: target_critic.append(r) target_critic = Variable( torch.FloatTensor(target_critic)).unsqueeze(0) if args.cuda: target_critic = target_critic.cuda() critic_loss += mse_loss(critic, target_critic) critic_loss = critic_loss / args.batch_size critic_loss.backward(retain_graph=True) optimizer.step() optimizer.zero_grad() actor_loss = 0 for trajectory_info in n_trajectory_info: obs, _rewards, _critic_info, _log_probs, _entropies = trajectory_info gae = [0 for _ in range(self.reward_types)] for i in range(len(obs)): obs_i = Variable(torch.FloatTensor( obs[i].tolist())).unsqueeze(0) if args.cuda: obs_i = obs_i.cuda() _, v_state = net(obs_i) v_state = v_state.data.cpu().numpy()[0] if i != len(_rewards) - 1: obs_next = Variable( torch.FloatTensor( obs[i + 1].tolist())).unsqueeze(0) if args.cuda: obs_next = obs_next.cuda() _, v_next_state = net(obs_next) v_next_state = v_next_state.data.cpu().numpy()[0] else: v_next_state = [0 for _ in range(len(_rewards))] advantage = 0 for r_i, r in enumerate(_rewards[i]): advantage += r + args.gamma * v_next_state[ r_i] - v_state[r_i] actor_loss += -_log_probs[ i] * advantage - args.beta * _entropies[i] # for trajectory_info in n_trajectory_info: # obs, _rewards, _critic_info, _log_probs, _entropies = trajectory_info # gae = [0 for _ in range(self.reward_types)] # for i in range(len(obs)-1,-1,-1): # obs_i = Variable(torch.FloatTensor(obs[i].tolist())).unsqueeze(0) # if args.cuda: # obs_i = obs_i.cuda() # _, v_state = net(obs_i) # v_state = v_state.data.cpu().numpy()[0] # if i != len(obs) - 1: # obs_next = Variable(torch.FloatTensor(obs[i + 1].tolist())).unsqueeze(0) # if args.cuda: # obs_next = obs_next.cuda() # _, v_next_state = net(obs_next) # v_next_state = v_next_state.data.cpu().numpy()[0] # else: # v_next_state = [0 for _ in range(len(_rewards))] # # # advantage = 0 # for r_i, r in enumerate(_rewards[i]): # delta_t = r + args.gamma * v_next_state[r_i] - v_state[r_i] # gae[r_i] = gae[r_i] * args.gamma * args.tau + delta_t # # advantage += r + args.gamma * v_next_state[r_i] - v_state[r_i] # actor_loss -= _log_probs[i] * sum(gae) - args.beta * _entropies[i] # for r_i, r in enumerate(_rewards[i]): # delta_t = r + args.gamma * v_next_state[r_i] - v_state[r_i] # gae[r_i] += (args.gamma * args.tau) # actor_loss -= _log_probs[i] * sum(gae) - args.beta * _entropies[i] actor_loss = actor_loss / args.batch_size actor_loss.backward() optimizer.step() n_trajectory_info = [] print('Train=> Episode:{} Reward:{} Length:{}'.format( episode, total_reward, len(ep_rewards))) # test and log if episode % (args.batch_size * 5) == 0: test_reward = self.test(net, env_fn, 10, log=True, args=args) test_perf_data.append(test_reward) print('Test Performance:', test_reward) if best is None or best <= test_reward: torch.save(net.state_dict(), net_path) best = test_reward print('Model Saved!') if best == env.reward_threshold: print('Optimal Performance achieved!!') break if episode % (args.batch_size * 10) == 0: plot_data( self.__get_plot_data_dict(train_perf_data, test_perf_data), plots_dir) return net
import tools data = ([20, 2], [40, 4], [80, 8]) tools.plot_data(data) data1 = ([2, 2], [3, 4], [5, 8]) tools.plot_data(data1) data2 = ([0, 2], [0, 4], [1, 8]) tools.plot_data(data2) data3 = ([4, 2], [10, 4], [15, 8]) tools.plot_data(data3)
def train(self, net, env_fn, net_path, plots_dir, args): optimizer = Adam(net.parameters(), lr=args.lr) test_perf_data = [] test_steps_data = [] train_perf_data = [] best = None n_trajectory_loss = [] loss_data = [] for episode in range(args.train_episodes): net.train() env = env_fn() # Gather data for a single episode done = False total_reward = 0 log_probs = [] ep_rewards = [] entropies = [] obs = env.reset() while not done: obs = Variable(torch.FloatTensor(obs.tolist())).unsqueeze(0) action_probs = net(obs) m = Categorical(action_probs) action = m.sample() action_log_prob = m.log_prob(Variable(action.data)) log_probs.append(action_log_prob) entropy = -(action_log_prob * action_probs).sum(1) entropies.append(entropy) action = int(action.data[0]) obs, reward, done, info = env.step(action) ep_rewards.append(reward) total_reward += reward train_perf_data.append(total_reward) # Estimate the Gradients R = 0 discounted_returns = [] for r in ep_rewards[::-1]: R = r + args.gamma * R discounted_returns.insert(0, R) discounted_returns = torch.FloatTensor(discounted_returns) discounted_returns = ( discounted_returns - discounted_returns.mean()) / ( discounted_returns.std() + np.finfo(np.float32).eps) policy_loss = [] for log_prob, score, entorpy in zip(log_probs, discounted_returns, entropies): policy_loss.append(-(log_prob * score - args.beta * entorpy)) n_trajectory_loss.append(policy_loss) # collect n-trajectories # Update the network after collecting n trajectories if episode % args.batch_size == 0: optimizer.zero_grad() sample_loss = 0 for _loss in n_trajectory_loss: sample_loss += torch.cat(_loss).sum() sample_loss = sample_loss / args.batch_size loss_data.append(sample_loss.data[0]) sample_loss.backward() optimizer.step() n_trajectory_loss = [] print('Train=> Episode:{} Reward:{} Length:{}'.format( episode, total_reward, len(ep_rewards))) # test and log if episode % args.batch_size == 0: test_reward, test_steps = self.test(net, env_fn, 10, log=True) test_perf_data.append(test_reward) test_steps_data.append(test_steps) print('Performance (Reward):', test_reward) print('Performance (Steps):', test_steps) if best is None or best <= test_reward: torch.save(net.state_dict(), net_path) best = test_reward print('Model Saved!') if best == env.reward_threshold: print('Optimal Performance achieved!!') break if episode % 10 == 0: plot_data( self.__get_plot_data_dict( train_perf_data, (test_perf_data, test_steps_data), loss_data), plots_dir) return net
def train(net, env, optimizer, model_path, plot_dir, train_data, batch_size, epochs, cuda=False, test_episodes=300, trunc_k=10): """ Supervised Learning to train the policy. Saves model in the given path. :param net: Bottleneck GRU network :param env: environment :param optimizer: optimizer method(Adam is preferred) :param model_path: path to where save the model :param plot_dir: path to where save the plots :param train_data: given training data :param batch_size: batch size :param epochs: number of training epochs :param cuda: check if cuda is available :param test_episodes: number of episodes to check :return: returns the trained model """ batch_seeds = list(train_data.keys()) test_seeds = [ random.randint(1000000, 10000000) for _ in range(test_episodes) ] best_i = None batch_loss_data = {'actor_mse': [], 'actor_ce': []} epoch_losses = {'actor_mse': [], 'actor_ce': []} perf_data = [] logger.info('Padding Sequences ...') for batch_i, batch_seed in enumerate(batch_seeds): data_obs, data_actions, data_action_probs, data_len = train_data[ batch_seed] _max, _min = max(data_len), min(data_len) obs_shape = data_obs[0][0].shape act_shape = np.array(data_actions[0][0]).shape act_prob_shape = np.array(data_action_probs[0][0]).shape if _max != _min: for i in range(len(data_obs)): data_obs[i] += [np.zeros(obs_shape)] * (_max - data_len[i]) data_actions[i] += [np.zeros(act_shape)] * (_max - data_len[i]) data_action_probs[i] += [np.zeros(act_prob_shape) ] * (_max - data_len[i]) for epoch in range(epochs): # Testing before training as sometimes the combined model doesn't needs to be trained test_perf = test(net, env, test_episodes, test_seeds=test_seeds, cuda=cuda, log=False, render=True) perf_data.append(test_perf) logger.info('epoch %d Test Performance: %f' % (epoch, test_perf)) if best_i is None or perf_data[best_i] <= perf_data[-1]: torch.save(net.state_dict(), model_path) logger.info('Binary GRU Model Saved!') best_i = len(perf_data) - 1 if best_i is None or perf_data[ best_i] < perf_data[-1] else best_i _reward_threshold_check = perf_data[-1] >= env.spec.reward_threshold _epoch_loss_check = (len(epoch_losses['actor_mse']) > 0) and (epoch_losses['actor_mse'][-1] == 0) if _reward_threshold_check or _epoch_loss_check: logger.info('Optimal Performance achieved!!!') logger.info('Exiting!') break net.train() batch_losses = {'actor_mse': [], 'actor_ce': []} random.shuffle(batch_seeds) for batch_i, batch_seed in enumerate(batch_seeds): net, actor_mse_loss, actor_ce_loss = _train(net, optimizer, train_data[batch_seed], batch_size, cuda=cuda, trunc_k=trunc_k) batch_losses['actor_mse'].append(actor_mse_loss) batch_losses['actor_ce'].append(actor_ce_loss) logger.info( 'epoch: {} batch: {} actor mse loss: {} actor ce loss: {}'. format(epoch, batch_i, actor_mse_loss, actor_ce_loss)) batch_loss_data['actor_mse'] += batch_losses['actor_mse'] batch_loss_data['actor_ce'] += batch_losses['actor_ce'] epoch_losses['actor_mse'].append(np.average(batch_losses['actor_mse'])) epoch_losses['actor_ce'].append(np.average(batch_losses['actor_ce'])) plot_data(verbose_data_dict(perf_data, epoch_losses, batch_loss_data), plot_dir) if np.isnan(batch_loss_data['actor_mse'][-1]) or np.isnan( batch_loss_data['actor_ce'][-1]): logger.info('Actor Loss: Nan') break if (len(perf_data) - 1 - best_i) > 50: logger.info('Early Stopping!') break plot_data(verbose_data_dict(perf_data, epoch_losses, batch_loss_data), plot_dir) net.load_state_dict(torch.load(model_path)) return net
def train(net, env, optimizer, model_path, plot_dir, train_data, batch_size, epochs, cuda=False, grad_clip=5, trunc_k=10, ep_check=True, rw_check=True): """ Supervised Learning to train the policy. Saves model in the given path. :param net: Bottleneck GRU network :param env: environment :param optimizer: optimizer method(Adam is preferred) :param model_path: path to where save the model :param plot_dir: path to where save the plots :param train_data: given training data :param batch_size: batch size :param epochs: number of training epochs :param cuda: check if cuda is available :param grad_clip: max norm of the gradients :param ep_check: check number of episodes :param rw_check: check reward :return: returns the trained model """ batch_seeds = list(train_data.keys()) test_env = copy.deepcopy(env) test_episodes = 300 test_seeds = [ random.randint(1000000, 10000000) for _ in range(test_episodes) ] best_i = None batch_loss_data = {'actor': []} epoch_losses = {'actor': []} perf_data = [] logger.info('Padding Sequences ...') for batch_i, batch_seed in enumerate(batch_seeds): data_obs, data_actions, _, data_len = train_data[batch_seed] _max, _min = max(data_len), min(data_len) _shape = data_obs[0][0].shape for i in range(len(data_obs)): data_obs[i] += [np.zeros(_shape)] * (_max - data_len[i]) data_actions[i] += [-1] * (_max - data_len[i]) for epoch in range(epochs): net.train() batch_losses = {'actor': []} random.shuffle(batch_seeds) for batch_i, batch_seed in enumerate(batch_seeds): net, actor_loss = _train(net, optimizer, train_data[batch_seed], batch_size, cuda, grad_clip, trunc_k) batch_losses['actor'].append(actor_loss) logger.info('epoch: {} batch: {} actor loss: {}'.format( epoch, batch_i, actor_loss)) test_perf = test(net, test_env, test_episodes, test_seeds=test_seeds, cuda=cuda) batch_loss_data['actor'] += batch_losses['actor'] epoch_losses['actor'].append(np.average(batch_losses['actor'])) perf_data.append(test_perf) logger.info('epoch %d Test Performance: %f' % (epoch, test_perf)) plot_data(verbose_data_dict(perf_data, epoch_losses, batch_loss_data), plot_dir) if best_i is None or perf_data[best_i] <= perf_data[-1]: torch.save(net.state_dict(), model_path) logger.info('GRU Model Saved!') best_i = len(perf_data) - 1 if best_i is None or perf_data[ best_i] < perf_data[-1] else best_i if np.isnan(batch_loss_data['actor'][-1]): logger.info('Batch Loss : Nan') break if (len(perf_data) - 1 - best_i) > 100: logger.info('Early Stopping!') break _reward_threshold_check = ((env.spec.reward_threshold is not None) and len(perf_data) > 1) \ and (np.average(perf_data[-10:]) == env.spec.reward_threshold) _epoch_loss_check = (len(epoch_losses['actor']) > 0) and (epoch_losses['actor'][-1] == 0) # We need to ensure complete imitation rather than just performance . Many a times, optimal # performance could be achieved without complete imitation of the actor if _epoch_loss_check and ep_check: logger.info('Complete Imitation of the Agent!!!') break if _reward_threshold_check and rw_check: logger.info('Consistent optimal performance achieved!!!') break net.load_state_dict(torch.load(model_path)) return net
def train(self, net, env_fn, net_path, plots_dir, args): optimizer = Adam(net.parameters(), lr=args.lr) test_perf_data = [] test_steps_data = [] train_perf_data = [] loss_data = [] best = None n_trajectory_loss = [] n_trajectory_type_loss = [] for episode in range(args.train_episodes): episode_start_time = time.time() net.train() env = env_fn() # Gather data for a single episode done = False total_reward = 0 log_probs = [] entropies = [] reward_type_log_probs = {i: [] for i in range(self.reward_types)} ep_decomposed_rewards = [] obs = env.reset() while not done: obs = Variable(torch.Tensor(obs.tolist())).unsqueeze(0) action_logits, reward_type_action_probs = net(obs) action_probs = F.softmax(action_logits) action_log_prob = F.log_softmax(action_logits, dim=0) entropy = -(action_log_prob * action_probs).sum() entropies.append(entropy) m = Categorical(action_probs) action = m.sample() log_probs.append(m.log_prob(Variable(action.data))) for reward_type_i in range(self.reward_types): m = Categorical( F.softmax(reward_type_action_probs[reward_type_i])) log_prob = m.log_prob(Variable(action.data)) if math.isnan(log_prob.data[0]): print(reward_type_action_probs[reward_type_i]) import pdb pdb.set_trace() reward_type_log_probs[reward_type_i].append(log_prob) action = int(action.data[0]) obs, reward, done, info = env.step(action) ep_decomposed_rewards.append(reward) total_reward += sum(reward) train_perf_data.append(total_reward) # Estimate the Gradients and update the network R_total = 0 R_decomposed = {i: 0 for i in range(self.reward_types)} discounted_total_returns = [] discounted_decomposed_returns = { i: [] for i in range(self.reward_types) } for r in ep_decomposed_rewards[::-1]: R_total = sum(r) + args.gamma * R_total discounted_total_returns.insert(0, R_total) for i, r_d in enumerate(r): R_decomposed[i] = r_d + args.gamma * R_decomposed[i] discounted_decomposed_returns[i].insert(0, R_decomposed[i]) discounted_total_returns = torch.FloatTensor( discounted_total_returns) discounted_total_returns = ( discounted_total_returns - discounted_total_returns.mean()) / ( discounted_total_returns.std() + np.finfo(np.float32).eps) for i in discounted_decomposed_returns: discounted_decomposed_returns[i] = torch.FloatTensor( discounted_decomposed_returns[i]) discounted_decomposed_returns[i] = ( discounted_decomposed_returns[i] - discounted_decomposed_returns[i].mean()) / ( discounted_decomposed_returns[i].std() + np.finfo(np.float32).eps) policy_loss = [] policy_type_losses = {i: [] for i in range(self.reward_types)} for log_prob, score, entorpy in zip(log_probs, discounted_total_returns, entropies): loss = -log_prob * score - args.beta * entorpy policy_loss.append(loss) for type_i in range(self.reward_types): for log_prob, score in zip( reward_type_log_probs[type_i], discounted_decomposed_returns[type_i]): policy_type_losses[type_i].append(-log_prob * score) n_trajectory_loss.append(policy_loss) n_trajectory_type_loss.append(policy_type_losses) if episode % args.batch_size == 0: start_time = time.time() optimizer.zero_grad() sample_loss = 0 for _loss in n_trajectory_loss: sample_loss += torch.cat(_loss).sum() for _loss in n_trajectory_type_loss: for type_i in range(self.reward_types): sample_loss += torch.cat(_loss[type_i]).sum() end_time = time.time() print("Loss Time", end_time - start_time) sample_loss = sample_loss / args.batch_size loss_data.append(sample_loss.data[0]) start_time = time.time() sample_loss.backward() optimizer.step() end_time = time.time() n_trajectory_loss = [] n_trajectory_type_loss = [] print("Update Network Time", end_time - start_time) episode_end_time = time.time() print('Episode:{} Reward:{} Length:{} Time:{}'.format( episode, total_reward, len(ep_decomposed_rewards), episode_end_time - episode_start_time)) # test and log if episode % 10 == 0: test_reward, test_steps = self.test(net, env_fn, 10, log=True, render=False) test_perf_data.append(test_reward) test_steps_data.append(test_steps) print('Performance (Reward):', test_reward) print('Performance (Steps):', test_steps) if best is None or best <= test_reward: torch.save(net.state_dict(), net_path) best = test_reward print('Model Saved!') if episode % 10 == 0: plot_data( self.__get_plot_data_dict( train_perf_data, (test_perf_data, test_steps_data), loss_data), plots_dir) return net