def main(): args = parse_arguments() agent_name = args.agent_name filename = args.filename num_epi = args.num_epi print(agent_name, filename, num_epi) pygame.init() pygame.display.set_mode([1, 1]) task = MarioTask(initMarioMode=2) if agent_name == 'human': agent = HumanAgent(task.ACTION_MAPPING) else: agent = Forwardagent() exp = EpisodicExperiment(task, agent) print 'Task Ready' exp.train(num_epi) print 'mm 2:', task.reward if agent_name == 'human': print(all_action) with open('./expert_data/' + filename + '_demo.pckl', 'wb') as f: pickle.dump((exp.all_states, exp.all_actions), f) print "finished" pygame.quit()
def main(): agent = MyAgent(None) #agent = AstarAgent() task = MarioTask(agent.name) task.env.initMarioMode = 2 task.env.levelDifficulty = int(sys.argv[1]) if len(sys.argv) == 2 else 0 experiment = EpisodicExperiment(task, agent) n_individuals = 10 filename = "learned_individuals_{0}".format(task.env.levelDifficulty) if os.path.exists(filename): initial_individuals = load(filename) else: initial_individuals = [ Individual(random=True) for i in range(n_individuals) ] current_individuals = initial_individuals n_generations = 50 for generation in range(n_generations): print("generation #{0} playing...".format(generation)) #task.env.visualization = generation % 10 == 0 task.env.visualization = generation % 50 == 0 current_individuals = make_next_generation(experiment, current_individuals, generation) save(current_individuals, filename) savelog(log_list)
def main(): f = open('try_3.txt', 'w') g = open('accs.txt', 'w') g.close() task = MarioTask("testbed", initMarioMode=2) task.env.initMarioMode = 2 task.env.levelDifficulty = 1 results = [] names = [] with open('type.txt', 'w') as f: f.write('dt') iterations = 10 rounds = 5 learning_samples = 2 eval_samples = 5 agent = Supervise(IT, useKMM=False) exp = EpisodicExperiment(task, agent) E = Evaluator(agent, exp) if args['noisy']: prefix = 'dt-noisy-sup-eval' else: prefix = 'dt-sup-eval' sl_data, sup_data, acc, loss = E.eval(rounds=rounds, iterations=iterations, learning_samples=learning_samples, eval_samples=eval_samples, prefix=prefix) np.save('./data/' + prefix + '-sl_data.npy', sl_data) np.save('./data/' + prefix + '-acc.npy', acc) np.save('./data/' + prefix + '-loss.npy', loss) analysis = Analysis() analysis.get_perf(sl_data, range(iterations)) analysis.plot(names=['Supervised Learning'], label='Reward', filename='./results/' + prefix + '-return_plots.eps') #, ylims=[0, 1600]) acc_a = Analysis() acc_a.get_perf(acc, range(iterations)) acc_a.plot(names=['Supervised Learning Acc.'], label='Accuracy', filename='./results/' + prefix + '-acc_plots.eps') loss_a = Analysis() loss_a.get_perf(loss, range(iterations)) loss_a.plot(names=['Supervised Loss'], label='Loss', filename='./results/' + prefix + '-loss_plots.eps') #agent.saveModel() print "finished"
def main(): f = open('try_3.txt', 'w') g = open('accs.txt', 'w') g.close() task = MarioTask("testbed", initMarioMode=2) task.env.initMarioMode = 2 task.env.levelDifficulty = 1 results = [] names = [] iterations = 50 rounds = 15 agent = Supervise(IT, useKMM=False) exp = EpisodicExperiment(task, agent) T = Tester(agent, exp) sl_data, sup_data, acc = T.test(rounds=rounds, iterations=iterations) np.save('./data/sup_data.npy', sup_data) np.save('./data/sl_data.npy', sl_data) np.save('./data/acc.npy', acc) IPython.embed() analysis = Analysis() analysis.get_perf(sup_data, range(iterations)) analysis.get_perf(sl_data, range(iterations)) analysis.plot(names=['Supervisor', 'Supervised Learning'], label='Reward', filename='./results/return_plots.eps') #, ylims=[0, 1600]) acc_a = Analysis() acc_a.get_perf(acc, range(iterations)) acc_a.plot(names=['Supervised Learning Acc.'], label='Accuracy', filename='./results/acc_plots.eps') print "finished"
def main(): f = open('try_3.txt', 'w') g = open('accs.txt', 'w') g.close() task = MarioTask("testbed", initMarioMode=2) task.env.initMarioMode = 2 task.env.levelDifficulty = 1 task.env.BASE_LEVEL = 500000 results = [] names = [] iterations = 20 rounds = 30 learning_samples = 33 eval_samples = 10 # iterations = 5 # rounds = 2 # learning_samples = 3 # eval_samples = 2 if args['noisy']: agent = NoisySupervise(IT, useKMM=False) dire = './training_data_noisy/' else: agent = Supervise(IT, useKMM=False) dire = './training_data/' exp = EpisodicExperiment(task, agent) C = Collector(agent, exp) C.collect(rounds=rounds, iterations=iterations, learning_samples=learning_samples, eval_samples=eval_samples, directory=dire) print "finished"
def main(): filename = "learned_individuals_{0}".format(0) rand_individual = Individual(data=filename, random=True) agent = MyAgent(rand_individual) task = MarioTask(agent.name, initMarioMode=2) exp = EpisodicExperiment(task, agent) print('Task Ready') exp.doEpisodes(2) print('mm 2:', task.reward) task.env.initMarioMode = 1 exp.doEpisodes(1) print('mm 1:', task.reward) task.env.initMarioMode = 0 exp.doEpisodes(1) print('mm 0:', task.reward) task.env.initMarioMode = 0 exp.doEpisodes(1) print('mm 0:', task.reward) task.env.initMarioMode = 0 task.env.levelDifficulty = 5 exp.doEpisodes(1) print('mm 0, ld 5: ', task.reward) task.env.initMarioMode = 1 task.env.levelDifficulty = 5 exp.doEpisodes(1) print('mm 1, ld 5: ', task.reward) task.env.initMarioMode = 2 task.env.levelDifficulty = 5 exp.doEpisodes(1) print('mm 2, ld 5: ', task.reward) print("finished")
def main(): f = open('try_3.txt','w') g = open('accs.txt', 'w') g.close() task = MarioTask("testbed", initMarioMode = 2) task.env.initMarioMode = 2 task.env.levelDifficulty = 1 results = [] names = [] with open('type.txt', 'w') as f: f.write('ent') # # #test dagger # iterations = 1 # rounds = 1 iterations = 50 rounds = 15 #agent = Dagger(IT,useKMM = False) #exp = EpisodicExperiment(task, agent) #T = Tester(agent,exp) #dagger_results = T.test(rounds = rounds,iterations = iterations) #dagger_data = dagger_results[-1] #dagger_results = dagger_results[:-1] #results.append(dagger_results) #names.append('dagger') #pickle.dump(results,open('results.p','wb')) #agent = Dagger(IT, useKMM=False) #exp = EpisodicExperiment(task, agent) #T = Tester(agent, exp) #dagger_data, _, acc = T.test(rounds = rounds, iterations = iterations) agent = Supervise(IT,useKMM = False) exp = EpisodicExperiment(task, agent) T = Tester(agent,exp) prefix = 'dt-noisy-sup-change-entropy' sl_data, sup_data, acc = T.test(rounds = rounds, iterations = iterations, prefix = prefix) np.save('./data/' + prefix + '-sup_data.npy', sup_data) np.save('./data/' + prefix + '-sl_data.npy', sl_data) np.save('./data/' + prefix + '-acc.npy', acc) # IPython.embed() analysis = Analysis() analysis.get_perf(sup_data, range(iterations)) analysis.get_perf(sl_data, range(iterations)) analysis.plot(names=['Supervisor', 'Supervised Learning'], label='Reward', filename='./results/' + prefix + '-return_plots.eps')#, ylims=[0, 1600]) acc_a = Analysis() acc_a.get_perf(acc, range(iterations)) acc_a.plot(names=['Supervised Learning Acc.'], label='Accuracy', filename='./results/' + prefix + '-acc_plots.eps') """ agent = Dagger(IT,useKMM = False) exp = EpisodicExperiment(task, agent) T = Tester(agent,exp) dagger_data, _, acc = T.test(rounds = rounds, iterations = iterations) np.save('./data/dagger_data.npy', dagger_data) np.save('./data/acc.npy', acc) IPython.embed() analysis = Analysis() analysis.get_perf(dagger_data, range(iterations)) analysis.plot(names=['DAgger'], label='Reward', filename='./results/return_plots.eps') acc_a = Analysis() acc_a.get_perf(acc, range(iterations)) acc_a.plot(names=['DAgger Acc.'], label='Accuracy', filename='./results/acc_plots.eps') """ #agent = Supervise(IT,useKMM = False) #exp = EpisodicExperiment(task, agent) #T = Tester(agent,exp) #supervise_results = T.test(rounds = rounds, iterations = iterations) #supervise_data = supervise_results[-1] #supervise_results = supervise_results[:-1] #results.append(supervise_results) #names.append('supervise') #pickle.dump(results,open('results.p','wb')) #IPython.embed() #analysis = Analysis() #analysis.get_perf(supervise_data, results[1][5]) #analysis.get_perf(dagger_data, results[0][5]) #analysis.plot(names=['Supervise', 'DAgger'], label='Reward', filename='./return_plot.eps')#, ylims=[-1, 0]) # agent = Sheath(IT,useKMM = False,sigma = 1.0) # exp = EpisodicExperiment(task, agent) # T = Tester(agent,exp) # dagger_results = T.test(rounds = 10,iterations = 35) # results.append(dagger_results) # names.append('sheath_1') # pickle.dump(results,open('results.p','wb')) # agent = Sheath(IT,useKMM = False,sigma = 1e-1) # exp = EpisodicExperiment(task, agent) # T = Tester(agent,exp) # dagger_results = T.test(rounds = 10,iterations = 35) # results.append(dagger_results) # names.append('sheath_1') # pickle.dump(results,open('results.p','wb')) # agent = Sheath(IT,useKMM = False,sigma = 0.5) # exp = EpisodicExperiment(task, agent) # T = Tester(agent,exp) # dagger_results = T.test(rounds = 10,iterations = 35) # results.append(dagger_results) # names.append('sheath_1') # pickle.dump(results,open('results.p','wb')) # agent = Sheath(IT,useKMM = False,sigma = 1e-1) # exp = EpisodicExperiment(task, agent) # T = Tester(agent,exp) # dagger_results = T.test(rounds = 4,iterations = 35) # results.append(dagger_results) # names.append('sheath_1') # agent = Sheath(IT,useKMM = False,sigma = 1e-2) # exp = EpisodicExperiment(task, agent) # T = Tester(agent,exp) # dagger_results = T.test(rounds = 4,iterations = 35) # results.append(dagger_results) # names.append('sheath_1') # # # # # #test big ahude # agent = Ahude(IT,f,gamma = 1e-2,labelState = True, useKMM = True) # exp = EpisodicExperiment(task, agent) # T = Tester(agent,exp) # ahude_big_results = T.test(rounds = 3) # results.append(ahude_big_results) # names.append('ahude_1e-1') # pickle.dump(results,open('results.p','wb')) # # # # # #test med ahude # agent = Ahude(IT,f,gamma = 1e-2,labelState = False,useKMM = True) # exp = EpisodicExperiment(task, agent) # T = Tester(agent,exp) # ahude_med_results = T.test(rounds = 3) # results.append(ahude_med_results) # names.append('ahude_1e-2') # # # # # # # # # #test small ahude # agent = Ahude(IT,f,gamma = 1e-3) # exp = EpisodicExperiment(task, agent) # T = Tester(agent,exp) # ahude_small_results = T.test() # results.append(ahude_small_results) # names.append('ahude_1e-3') # pickle.dump(results,open('results.p','wb')) #plt.figure(1) #for i in range(len(results)): # plt.plot(results[i][5],results[i][1]) #plt.legend(names,loc='upper left') # plt.figure(2) # for i in range(len(results)): # plt.plot(results[i][0]) # plt.legend(names,loc='upper left') # plt.figure(3) # for i in range(0,len(results)): # plt.plot(results[i][3]) # plt.legend(names,loc='upper left') plt.show() # IPython.embed() f.close() #agent.saveModel() print "finished"
def main(): clo = CmdLineOptions(sys.argv) task = MarioTask(MarioEnvironment(clo.getHost(), clo.getPort(), clo.getAgent().name)) exp = EpisodicExperiment(clo.getAgent(), task) exp.doEpisodes(3)
def main(): f = open('try_3.txt','w') g = open('accs.txt', 'w') g.close() task = MarioTask("testbed", initMarioMode = 2) task.env.initMarioMode = 2 task.env.levelDifficulty = 1 results = [] names = [] iterations = 5 rounds = 2 learning_samples = 2 eval_samples = 3 if args['noisy']: prefix = '-noisy-sup-eval' dire = './training_data_noisy/' agent = NoisySupervise(IT, useKMM = False) else: prefix = '-sup-eval' dire = './training_data/' agent = Supervise(IT,useKMM = False) if args['linear']: agent.learner.linear = True prefix = 'svc' + prefix else: agent.learner.linear = False prefix = 'dt' + prefix exp = EpisodicExperiment(task, agent) E = Evaluator(agent,exp) sl_data, sup_data, acc, loss, js = E.eval(rounds = rounds, iterations = iterations, learning_samples=learning_samples, eval_samples=eval_samples, prefix = prefix, directory = dire) np.save('./data/' + prefix + '-sl_data.npy', sl_data) np.save('./data/' + prefix + '-acc.npy', acc) np.save('./data/' + prefix + '-loss.npy', loss) np.save('./data/' + prefix + '-js.npy', js) analysis = Analysis() analysis.get_perf(sl_data, range(iterations)) analysis.plot(names=['Supervised Learning'], label='Reward', filename='./results/' + prefix + '-return_plots.eps')#, ylims=[0, 1600]) acc_a = Analysis() acc_a.get_perf(acc, range(iterations)) acc_a.plot(names=['Supervised Learning Acc.'], label='Accuracy', filename='./results/' + prefix + '-acc_plots.eps', ylims=[0, 1]) loss_a = Analysis() loss_a.get_perf(loss, range(iterations)) loss_a.plot(names=['Supervised Learning loss'], label='Loss', filename='./results/' + prefix + '-loss_plots.eps', ylims=[0, 1]) js_a = Analysis() js_a.get_perf(js, range(iterations)) js_a.plot(names=['Supervised Learning'], label='J()', filename='./results/' + prefix + '-js_plots.eps') #agent.saveModel() print "finished"
def main(): agent = ForwardAgent() task = MarioTask(agent.name, initMarioMode=2) exp = EpisodicExperiment(task, agent) print 'Task Ready' exp.doEpisodes(2) print 'mm 2:', task.reward task.env.initMarioMode = 1 exp.doEpisodes(1) print 'mm 1:', task.reward task.env.initMarioMode = 0 exp.doEpisodes(1) print 'mm 0:', task.reward task.env.initMarioMode = 0 exp.doEpisodes(1) print 'mm 0:', task.reward task.env.initMarioMode = 0 task.env.levelDifficulty = 5 exp.doEpisodes(1) print 'mm 0, ld 5: ', task.reward task.env.initMarioMode = 1 task.env.levelDifficulty = 5 exp.doEpisodes(1) print 'mm 1, ld 5: ', task.reward task.env.initMarioMode = 2 task.env.levelDifficulty = 5 exp.doEpisodes(1) print 'mm 2, ld 5: ', task.reward print "finished"
IPython.embed() analysis = Analysis() analysis.get_perf(sup_data, range(iterations)) analysis.get_perf(sl_data, range(iterations)) analysis.plot(names=['Supervisor', 'Supervised Learning'], label='Reward', filename='./results/return_plots.eps')#, ylims=[0, 1600]) acc_a = Analysis() acc_a.get_perf(acc, range(iterations)) acc_a.plot(names=['Supervised Learning Acc.'], label='Accuracy', filename='./results/acc_plots.eps') """ agent = Dagger(IT,useKMM = False) exp = EpisodicExperiment(task, agent) T = Tester(agent,exp) prefix = 'svc-dagger-change' dagger_data, _, acc = T.test(rounds = rounds, iterations = iterations, prefix = prefix) np.save('./data/svc-dagger-change-dagger_data.npy', dagger_data) np.save('./data/svc-dagger-change-acc.npy', acc) # IPython.embed() analysis = Analysis() analysis.get_perf(dagger_data, range(iterations)) analysis.plot(names=['DAgger'], label='Reward', filename='./results/svc-dagger-change-return_plots.eps') acc_a = Analysis() acc_a.get_perf(acc, range(iterations))
def main(): f = open('try_3.txt', 'w') g = open('accs.txt', 'w') g.close() task = MarioTask("testbed", initMarioMode=2) task.env.initMarioMode = 2 task.env.levelDifficulty = 1 results = [] names = [] with open('type.txt', 'w') as f: f.write('dt') iterations = 20 rounds = 30 learning_samples = 33 eval_samples = 10 # iterations = 5 # rounds = 2 # learning_samples = 3 # eval_samples = 2 agent = Dagger(IT, useKMM=False) if args['linear']: agent.learner.linear = True prefix = 'svc-dagger-change-' else: agent.learner.linear = False prefix = 'dt-dagger-change-' exp = EpisodicExperiment(task, agent) T = Tester(agent, exp) dagger_data, _, acc, loss, js, test_acc = T.test( rounds=rounds, iterations=iterations, learning_samples=learning_samples, eval_samples=eval_samples, prefix=prefix) np.save('./data/' + prefix + 'dagger_data.npy', dagger_data) np.save('./data/' + prefix + 'acc.npy', acc) np.save('./data/' + prefix + 'loss.npy', loss) np.save('./data/' + prefix + 'js.npy', js) np.save('./data/' + prefix + 'test_acc.npy', test_acc) analysis = Analysis() analysis.get_perf(dagger_data, range(iterations)) analysis.plot(names=['DAgger'], label='Reward', filename='./results/' + prefix + 'return_plots.eps') acc_a = Analysis() acc_a.get_perf(acc, range(iterations)) acc_a.plot(names=['DAgger Acc.'], label='Accuracy', filename='./results/' + prefix + 'acc_plots.eps', ylims=[0, 1]) test_acc_a = Analysis() test_acc_a.get_perf(test_acc, range(iterations)) test_acc_a.plot(names=['DAgger Acc.'], label='Test Accuracy', filename='./results/' + prefix + 'test_acc_plots.eps', ylims=[0, 1]) loss_a = Analysis() loss_a.get_perf(loss, range(iterations)) loss_a.plot(names=['DAgger Loss'], label='Loss', filename='./results/' + prefix + 'loss_plots.eps', ylims=[0, 1]) js_a = Analysis() js_a.get_perf(js, range(iterations)) js_a.plot(names=['DAgger'], label='J()', filename='./results/' + prefix + '-js_plots.eps') print "finished"
def main(): agent = ForwardAgent() task = MarioTask(agent.name, initMarioMode = 2) exp = EpisodicExperiment(task, agent) print 'Task Ready' exp.doEpisodes(2) print 'mm 2:', task.reward task.env.initMarioMode = 1 exp.doEpisodes(1) print 'mm 1:', task.reward task.env.initMarioMode = 0 exp.doEpisodes(1) print 'mm 0:', task.reward task.env.initMarioMode = 0 exp.doEpisodes(1) print 'mm 0:', task.reward task.env.initMarioMode = 0 task.env.levelDifficulty = 5 exp.doEpisodes(1) print 'mm 0, ld 5: ', task.reward task.env.initMarioMode = 1 task.env.levelDifficulty = 5 exp.doEpisodes(1) print 'mm 1, ld 5: ', task.reward task.env.initMarioMode = 2 task.env.levelDifficulty = 5 exp.doEpisodes(1) print 'mm 2, ld 5: ', task.reward print "finished"
def main(): f = open('try_3.txt','w') agent = Ahude(IT,f) distances = np.zeros([1]) data = np.zeros([1]) num_help = np.zeros([1]) mis_match = np.zeros([1]) #task = MarioTask(agent.name, initMarioMode = 2) #exp = EpisodicExperiment(task, agent) print 'Task Ready' #task.env.initMarioMode = 2 #task.env.levelDifficulty = 1 task = MarioTask(agent.name, initMarioMode = 2) exp = EpisodicExperiment(task, agent) task.env.initMarioMode = 2 task.env.levelDifficulty = 1 if(agent.initialTraining): exp.doEpisodes(2) agent.newModel() agent.saveModel() else: for i in range(ITERATIONS): #IPython.embed() #if( i == 2): #agent.initialTraining = True; # IPython.embed() print "ITERATION",i f.write('ITERATION %i \n' %i) f.write('___________________________________________________\n') rewards = exp.doEpisodes(1) agent.updateModel() if(agent._getName() == 'Ahude'): num_help = np.vstack((num_help,np.array(agent.getNumHelp()))) mis_match = np.vstack((mis_match,np.array(agent.getMismatch()))) #agent.off = True #rewards = exp.doEpisodes(1) #agent.off = False size = len(rewards[0]) distances = np.vstack((distances,np.array(rewards[0][size-1]))) data = np.vstack((data,np.array(agent.getNumData()))) agent.reset() #agent.notComplete = False print "TRACK COMPLETE" #IPython.embed() # IPython.embed() f.close() plt.figure(2) plt.plot(data,distances) if(agent._getName() == 'Ahude'): plt.figure(1) plt.plot(data,num_help) plt.figure(3) plt.plot(mis_match) plt.show() #agent.saveModel() print "finished"