Beispiel #1
0
def q_learning_nfq(**args):

    # estimate
    best_score = 0
    best_turn = 1000
    best_agent = None

    score_list = []
    turn_list = []
    #for i in range(2):
    for i in range(50):

        agent = QLearning(12, 4)

        # training
        agent.greedy_rate = 0.0
        print
        print "==========================="
        print 'before training'
        print_state(agent.get_q_values)
        training(agent, args)
        print 'after training'
        print_state(agent.get_q_values)
        agent.greedy_rate = 0.7
        #agent.learner._setExplorer(EpsilonGreedyExplorer(0.3))

        score, turn = play(agent, 'neural', args, [2, 2])

        score_list.append(score)
        turn_list.append(turn)

        print
        print 'test one play'
        print i, int(numpy.mean(score_list)), max(score_list), score, turn

        if best_agent == None or numpy.average(
                best_agent.train_error) > numpy.average(agent.train_error):
            print 'best train error !'
            best_score = score
            best_turn = turn
            best_agent = agent
        # if best_score < score or best_turn > turn:
        #         print 'best train error !'
        #         best_score = score
        #         best_turn  = turn
        #         best_agent = agent

        with open(args['path'] + '/result.dump', 'w') as f:
            pickle.dump([score_list, turn_list, best_agent], f)
    print
    print "==========================="
    print 'best score : ', best_score
    print 'best turn : ', best_turn
    print_state(best_agent.get_q_values)
def q_learning_nfq(**args):

    # estimate
    best_score = 0
    best_turn  = 1000
    best_agent = None

    score_list = []
    turn_list = []
    #for i in range(2):
    for i in range(50):

        agent = QLearning(12, 4)

        # training
        agent.greedy_rate   = 0.0
        print
        print "==========================="
        print 'before training'
        print_state(agent.get_q_values)
        training(agent, args)
        print 'after training'
        print_state(agent.get_q_values)
        agent.greedy_rate   = 0.7
        #agent.learner._setExplorer(EpsilonGreedyExplorer(0.3))

        score, turn = play(agent, 'neural', args, [2,2])

        score_list.append(score)
        turn_list.append(turn)

        print
        print 'test one play'
        print i, int(numpy.mean(score_list)) , max(score_list) , score, turn

        if best_agent==None or numpy.average(best_agent.train_error) > numpy.average(agent.train_error):
            print 'best train error !'
            best_score = score
            best_turn  = turn
            best_agent = agent
        # if best_score < score or best_turn > turn:
        #         print 'best train error !'
        #         best_score = score
        #         best_turn  = turn
        #         best_agent = agent

        with open(args['path']+'/result.dump', 'w') as f:
            pickle.dump([score_list, turn_list, best_agent], f)
    print
    print "==========================="
    print 'best score : ', best_score
    print 'best turn : ', best_turn
    print_state(best_agent.get_q_values)
Beispiel #3
0
def q_learning_nfq(**args):

    # estimate
    best_score = 0
    best_turn = 1000
    best_agent = None

    score_list = []
    turn_list = []
    for i in range(1):
        #for i in range(50):

        #agent = QLearning(12, 4)
        agent = QLearning(117, 4)

        # training
        agent.greedy_rate = 0.5
        for i in range(100):
            print
            print "=========================== ", i
            agent.greedy_rate += 0.05 if agent.greedy_rate < 0.7 else 0.0
            training(agent, args)
        agent.greedy_rate = 0.7

        #score, turn = play(agent, 'neural', args, [2,2])
        score, turn = play(agent)

        score_list.append(score)
        turn_list.append(turn)

        print
        print 'test one play'
        print i, int(numpy.mean(score_list)), max(score_list), score, turn

        if best_agent == None or numpy.average(
                best_agent.train_error) > numpy.average(agent.train_error):
            print 'best train error !'
            best_score = score
            best_turn = turn
            best_agent = agent
        # if best_score < score or best_turn > turn:
        #         print 'best train error !'
        #         best_score = score
        #         best_turn  = turn
        #         best_agent = agent

        with open(args['path'] + '/result.dump', 'w') as f:
            pickle.dump([score_list, turn_list, best_agent], f)
    print
    print "==========================="
    print 'best score : ', best_score
    print 'best turn : ', best_turn
def q_learning_nfq(**args):

    # estimate
    best_score = 0
    best_turn  = 1000
    best_agent = None

    score_list = []
    turn_list = []
    for i in range(1):
    #for i in range(50):

        #agent = QLearning(12, 4)
        agent = QLearning(117, 4)

        # training
        agent.greedy_rate   = 0.5
        for i in range(100):
            print
            print "=========================== ", i
            agent.greedy_rate += 0.05 if agent.greedy_rate < 0.7 else 0.0
            training(agent, args)
        agent.greedy_rate   = 0.7

        #score, turn = play(agent, 'neural', args, [2,2])
        score, turn = play(agent)

        score_list.append(score)
        turn_list.append(turn)

        print
        print 'test one play'
        print i, int(numpy.mean(score_list)) , max(score_list) , score, turn

        if best_agent==None or numpy.average(best_agent.train_error) > numpy.average(agent.train_error):
            print 'best train error !'
            best_score = score
            best_turn  = turn
            best_agent = agent
        # if best_score < score or best_turn > turn:
        #         print 'best train error !'
        #         best_score = score
        #         best_turn  = turn
        #         best_agent = agent

        with open(args['path']+'/result.dump', 'w') as f:
            pickle.dump([score_list, turn_list, best_agent], f)
    print
    print "==========================="
    print 'best score : ', best_score
    print 'best turn : ', best_turn
Beispiel #5
0
def main():

    # ダミー変数化のため, [0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
    # 入力素子数は, 16×14にされる.
    ql_obj = QLearning(16, 4, dummy=False)

    max_score = 0
    score_list = []
    for i in range(10000):

        score, result = play(ql_obj)

        # Q-learning
        ql_obj.train(result)

        score_list.append(score)

        # print weight
        data = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 2], [0, 0, 0, 2]]
        output_vec = ql_obj.get_q_values(data)
        print i, numpy.mean(score_list), max(score_list), output_vec
def main():

    # ダミー変数化のため, [0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
    # 入力素子数は, 16×14にされる.
    ql_obj =  QLearning(16, 4, dummy=False)

    max_score = 0
    score_list = []
    for i in range(10000):

        score, result = play(ql_obj)

        # Q-learning
        ql_obj.train(result)

        score_list.append(score)

        # print weight
        data =[[0,0,0,0], [0,0,0,0], [0,0,0,2], [0,0,0,2]]
        output_vec= ql_obj.get_q_values(data)
        print i, numpy.mean(score_list) , max(score_list), output_vec
Beispiel #7
0
def learning(best_agent):

    # print_experience
    best_agent.print_experience()
    print_state(best_agent.get_q_values)

    agent = QLearning(12, 4)

    # 学習前
    agent.episodes = best_agent.episodes

    # 学習
    for i, episode in enumerate(best_agent.episodes):
        agent.history += episode
        if i % 10 == 0 and not i == 0:
            agent.learn()
            agent.reset()

    # 学習前
    print_state(agent.get_q_values)

    # print_experience
    agent.print_experience()
def learning(best_agent):

    # print_experience
    best_agent.print_experience()
    print_state(best_agent.get_q_values)

    agent = QLearning(12, 4)

    # 学習前
    agent.episodes = best_agent.episodes

    # 学習
    for i, episode in enumerate(best_agent.episodes):
        agent.history += episode
        if i % 10 == 0 and not i == 0:
            agent.learn()
            agent.reset()

    # 学習前
    print_state(agent.get_q_values)

    # print_experience
    agent.print_experience()