def run_training(model, run_name, epochs, plot=None): global optimizer model.cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) if plot is None: plot = PlotLearning('./plots/cifar/', 10, prefix=run_name) for epoch in range(1, epochs + 1): accu_train, loss_train = train(epoch) accu_test, loss_test = test() logs = {} logs['acc'] = accu_train logs['val_acc'] = accu_test logs['loss'] = loss_train logs['val_loss'] = loss_test plot.plot(logs) return plot
def run_training(model, run_name, epochs, plot=None): visdom_log = { 'model_type': run_name, 'epoch': [], 'train_accuracy': [], 'test_accuracy': [], 'train_loss': [], 'test_loss': [], 'top_train_data': { 'epoch': 0, 'accuracy': 0.0, 'loss': 0.0 }, 'top_test_data': { 'epoch': 0, 'accuracy': 0.0, 'loss': 0.0 } } model.cuda() if plot is None: plot = PlotLearning('./plots/cifar/', 10, plot_name=run_name) for epoch in range(1, epochs + 1): accu_train, loss_train = train(model, epoch) accu_test, loss_test = test(model) logs = {} logs['acc'] = accu_train logs['val_acc'] = accu_test logs['loss'] = loss_train logs['val_loss'] = loss_test plot.plot(logs) visdom_log['epoch'].append(epoch) visdom_log['train_accuracy'].append(accu_train) visdom_log['test_accuracy'].append(accu_test) visdom_log['train_loss'].append(loss_train) visdom_log['test_loss'].append(loss_test) return plot, visdom_log
best_test["test_accuracy"] = accu_test best_test["validation_accuarcy"] = accu_train # print "=========================================" # print "the epoch with best test accuracy is:", # print best_test # print "the accuracy reached with net2net in", # print acc # print "=========================================" return plot, best_test, log if __name__ == "__main__": logs = [] visdom_plot = PlotLearning('./plots/cifar/', 10, prefix='Net2Net Implementation', plot_name=args.plot_name + '_wider') print("\n\n > Teacher (Base Network) training ... ") model = Net() model.double() model.cuda() print model criterion = nn.NLLLoss() plot, _, log_base = run_training(model, 'Teacher', args.epochs, plot=visdom_plot, color='red') logs.append(log_base)
eps_end=0.01, inp_dims=[8], lr=0.001) scores, eps_history = [], [] n_games = 500 for i in range(n_games): score = 0 done = False observation = env.reset() while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, done) agent.learn() observation = observation_ scores.append(score) eps_history.append(agent.epsilon) avg_score = np.mean(scores[-100:]) print('episode ', i, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon) x = [i + 1 for i in range(n_games)] filename = 'lunar_lander.png' PlotLearning(x, scores, eps_history, filename) # https://github.com/user432/DeepRL-in-PyTorch/
start_t = time.time() print("\n\n > Wider teacher training ... ") colors.append('green') trace_names.extend(['Wider Random Train', 'Wider Random Test']) model_ = ConvNet(net_dataset=CIFAR10) del model model = model_ model.define_wider(widening_factor=2) model.cuda() print model _, log_random_init = run_training(model, 'Wider_teacher_', args.epochs) print(" >> Time taken {}".format(time.time() - start_t)) logs.append(log_random_init) # # wider deeper teacher training # print("\n\n > Wider+Deeper teacher training ... ") # start_t = time.time() # model_ = Net() # # del model # model = model_ # model.define_wider_deeper() # run_training(model, 'Wider_Deeper_teacher_', args.epochs + 1) # print(" >> Time taken {}".format(time.time() - start_t)) visdom_plot_final = PlotLearning('./plots/cifar/', 10, plot_name=args.plot_name) visdom_plot_final.plot_logs(logs, trace_names, colors)
# # visdom_plot = PlotLearning('./plots/cifar/', 10, prefix='Net2Net Implementation', # plot_name=args.plot_name + "_deeper") # visdom_plot.plot_logs(logs, args.plot_name) # For Wider + Deeper model training # # del logs[1:] # # # wider deeper model training from scratch # print("\n\n > Wider+Deeper Model training from scratch... ") # model_wider_deeper = Net() # model_wider_deeper.define_wider_deeper() # _, _, log = run_training(model_wider_deeper, 'Wider_Deeper_teacher_', args.epochs) # logs.append(log) # # # wider + deeper model training from Net2Net # print("\n\n > Wider+Deeper Student training with Net2Net... ") # model_wider_deeper_net2net = Net() # model_wider_deeper_net2net.net2net_wider() # model_wider_deeper_net2net = copy.deepcopy(model_wider_Net2Net) # model_wider_deeper_net2net.net2net_deeper() # _, _, log = run_training(model_wider_deeper_net2net, 'WiderDeeper_student_', args.epochs) # logs.append(log) visdom_plot = PlotLearning('./plots/cifar/', 10, prefix='Net2Net Implementation', plot_name=args.plot_name + "_deeper_wider") visdom_plot.plot_logs(logs, args.plot_name)
if plot is not None: win_accuracy, win_loss = plot_live_data(plot, win_accuracy, win_loss, run_name, **live_data) return visdom_log, win_accuracy, win_loss if __name__ == "__main__": logs = [] colors = [] trace_names = [] if args.plot_name is not None: visdom_live_plot = PlotLearning('./plots/cifar/', 10, plot_name=args.plot_name, env_name=args.env_name) else: visdom_live_plot = None start_t = time.time() print("\n\n > Teacher training ... ") colors.append('orange') trace_names.extend(['Teacher Train', 'Teacher Test']) model = Net() model.cuda() criterion = nn.NLLLoss() log_base, win_accuracy, win_loss = run_training(model, 'Teacher_', args.epochs, visdom_live_plot) logs.append(log_base)
if __name__ == '__main__': agent = Agent(ALPHA=0.001, inp_dims=[8], GAMMA=0.99, n_actions=4, l1_size=128, l2_size=128) env = gym.make('LunarLander-v2') score_history = [] score = 0 num_episodes = 2500 env = wrappers.Monitor(env, "tmp/lunar-lander", video_callable=lambda episode_id: True, force=True) for i in range(num_episodes): print('episode: ', i, 'score: ', score) done = False score = 0 observation = env.reset() while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) agent.store_rewards(reward) observation = observation_ score += reward score_history.append(score) agent.learn() filename = 'lunar-lander-alpha001-128x128fc-newG.png' PlotLearning(score_history, filename=filename, window=25)