def q_learning_nfq(**args): # estimate best_score = 0 best_turn = 1000 best_agent = None score_list = [] turn_list = [] #for i in range(2): for i in range(50): agent = QLearning(12, 4) # training agent.greedy_rate = 0.0 print print "===========================" print 'before training' print_state(agent.get_q_values) training(agent, args) print 'after training' print_state(agent.get_q_values) agent.greedy_rate = 0.7 #agent.learner._setExplorer(EpsilonGreedyExplorer(0.3)) score, turn = play(agent, 'neural', args, [2, 2]) score_list.append(score) turn_list.append(turn) print print 'test one play' print i, int(numpy.mean(score_list)), max(score_list), score, turn if best_agent == None or numpy.average( best_agent.train_error) > numpy.average(agent.train_error): print 'best train error !' best_score = score best_turn = turn best_agent = agent # if best_score < score or best_turn > turn: # print 'best train error !' # best_score = score # best_turn = turn # best_agent = agent with open(args['path'] + '/result.dump', 'w') as f: pickle.dump([score_list, turn_list, best_agent], f) print print "===========================" print 'best score : ', best_score print 'best turn : ', best_turn print_state(best_agent.get_q_values)
def q_learning_nfq(**args): # estimate best_score = 0 best_turn = 1000 best_agent = None score_list = [] turn_list = [] #for i in range(2): for i in range(50): agent = QLearning(12, 4) # training agent.greedy_rate = 0.0 print print "===========================" print 'before training' print_state(agent.get_q_values) training(agent, args) print 'after training' print_state(agent.get_q_values) agent.greedy_rate = 0.7 #agent.learner._setExplorer(EpsilonGreedyExplorer(0.3)) score, turn = play(agent, 'neural', args, [2,2]) score_list.append(score) turn_list.append(turn) print print 'test one play' print i, int(numpy.mean(score_list)) , max(score_list) , score, turn if best_agent==None or numpy.average(best_agent.train_error) > numpy.average(agent.train_error): print 'best train error !' best_score = score best_turn = turn best_agent = agent # if best_score < score or best_turn > turn: # print 'best train error !' # best_score = score # best_turn = turn # best_agent = agent with open(args['path']+'/result.dump', 'w') as f: pickle.dump([score_list, turn_list, best_agent], f) print print "===========================" print 'best score : ', best_score print 'best turn : ', best_turn print_state(best_agent.get_q_values)
def q_learning_nfq(**args): # estimate best_score = 0 best_turn = 1000 best_agent = None score_list = [] turn_list = [] for i in range(1): #for i in range(50): #agent = QLearning(12, 4) agent = QLearning(117, 4) # training agent.greedy_rate = 0.5 for i in range(100): print print "=========================== ", i agent.greedy_rate += 0.05 if agent.greedy_rate < 0.7 else 0.0 training(agent, args) agent.greedy_rate = 0.7 #score, turn = play(agent, 'neural', args, [2,2]) score, turn = play(agent) score_list.append(score) turn_list.append(turn) print print 'test one play' print i, int(numpy.mean(score_list)), max(score_list), score, turn if best_agent == None or numpy.average( best_agent.train_error) > numpy.average(agent.train_error): print 'best train error !' best_score = score best_turn = turn best_agent = agent # if best_score < score or best_turn > turn: # print 'best train error !' # best_score = score # best_turn = turn # best_agent = agent with open(args['path'] + '/result.dump', 'w') as f: pickle.dump([score_list, turn_list, best_agent], f) print print "===========================" print 'best score : ', best_score print 'best turn : ', best_turn
def q_learning_nfq(**args): # estimate best_score = 0 best_turn = 1000 best_agent = None score_list = [] turn_list = [] for i in range(1): #for i in range(50): #agent = QLearning(12, 4) agent = QLearning(117, 4) # training agent.greedy_rate = 0.5 for i in range(100): print print "=========================== ", i agent.greedy_rate += 0.05 if agent.greedy_rate < 0.7 else 0.0 training(agent, args) agent.greedy_rate = 0.7 #score, turn = play(agent, 'neural', args, [2,2]) score, turn = play(agent) score_list.append(score) turn_list.append(turn) print print 'test one play' print i, int(numpy.mean(score_list)) , max(score_list) , score, turn if best_agent==None or numpy.average(best_agent.train_error) > numpy.average(agent.train_error): print 'best train error !' best_score = score best_turn = turn best_agent = agent # if best_score < score or best_turn > turn: # print 'best train error !' # best_score = score # best_turn = turn # best_agent = agent with open(args['path']+'/result.dump', 'w') as f: pickle.dump([score_list, turn_list, best_agent], f) print print "===========================" print 'best score : ', best_score print 'best turn : ', best_turn
def main(): # ダミー変数化のため, [0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192] # 入力素子数は, 16×14にされる. ql_obj = QLearning(16, 4, dummy=False) max_score = 0 score_list = [] for i in range(10000): score, result = play(ql_obj) # Q-learning ql_obj.train(result) score_list.append(score) # print weight data = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 2], [0, 0, 0, 2]] output_vec = ql_obj.get_q_values(data) print i, numpy.mean(score_list), max(score_list), output_vec
def main(): # ダミー変数化のため, [0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192] # 入力素子数は, 16×14にされる. ql_obj = QLearning(16, 4, dummy=False) max_score = 0 score_list = [] for i in range(10000): score, result = play(ql_obj) # Q-learning ql_obj.train(result) score_list.append(score) # print weight data =[[0,0,0,0], [0,0,0,0], [0,0,0,2], [0,0,0,2]] output_vec= ql_obj.get_q_values(data) print i, numpy.mean(score_list) , max(score_list), output_vec
def learning(best_agent): # print_experience best_agent.print_experience() print_state(best_agent.get_q_values) agent = QLearning(12, 4) # 学習前 agent.episodes = best_agent.episodes # 学習 for i, episode in enumerate(best_agent.episodes): agent.history += episode if i % 10 == 0 and not i == 0: agent.learn() agent.reset() # 学習前 print_state(agent.get_q_values) # print_experience agent.print_experience()