Beispiel #1
0
def q_learning_nfq(**args):

    # estimate
    best_score = 0
    best_turn = 1000
    best_agent = None

    score_list = []
    turn_list = []
    #for i in range(2):
    for i in range(50):

        agent = QLearning(12, 4)

        # training
        agent.greedy_rate = 0.0
        print
        print "==========================="
        print 'before training'
        print_state(agent.get_q_values)
        training(agent, args)
        print 'after training'
        print_state(agent.get_q_values)
        agent.greedy_rate = 0.7
        #agent.learner._setExplorer(EpsilonGreedyExplorer(0.3))

        score, turn = play(agent, 'neural', args, [2, 2])

        score_list.append(score)
        turn_list.append(turn)

        print
        print 'test one play'
        print i, int(numpy.mean(score_list)), max(score_list), score, turn

        if best_agent == None or numpy.average(
                best_agent.train_error) > numpy.average(agent.train_error):
            print 'best train error !'
            best_score = score
            best_turn = turn
            best_agent = agent
        # if best_score < score or best_turn > turn:
        #         print 'best train error !'
        #         best_score = score
        #         best_turn  = turn
        #         best_agent = agent

        with open(args['path'] + '/result.dump', 'w') as f:
            pickle.dump([score_list, turn_list, best_agent], f)
    print
    print "==========================="
    print 'best score : ', best_score
    print 'best turn : ', best_turn
    print_state(best_agent.get_q_values)
def q_learning_nfq(**args):

    # estimate
    best_score = 0
    best_turn  = 1000
    best_agent = None

    score_list = []
    turn_list = []
    #for i in range(2):
    for i in range(50):

        agent = QLearning(12, 4)

        # training
        agent.greedy_rate   = 0.0
        print
        print "==========================="
        print 'before training'
        print_state(agent.get_q_values)
        training(agent, args)
        print 'after training'
        print_state(agent.get_q_values)
        agent.greedy_rate   = 0.7
        #agent.learner._setExplorer(EpsilonGreedyExplorer(0.3))

        score, turn = play(agent, 'neural', args, [2,2])

        score_list.append(score)
        turn_list.append(turn)

        print
        print 'test one play'
        print i, int(numpy.mean(score_list)) , max(score_list) , score, turn

        if best_agent==None or numpy.average(best_agent.train_error) > numpy.average(agent.train_error):
            print 'best train error !'
            best_score = score
            best_turn  = turn
            best_agent = agent
        # if best_score < score or best_turn > turn:
        #         print 'best train error !'
        #         best_score = score
        #         best_turn  = turn
        #         best_agent = agent

        with open(args['path']+'/result.dump', 'w') as f:
            pickle.dump([score_list, turn_list, best_agent], f)
    print
    print "==========================="
    print 'best score : ', best_score
    print 'best turn : ', best_turn
    print_state(best_agent.get_q_values)
Beispiel #3
0
def q_learning_nfq(**args):

    # estimate
    best_score = 0
    best_turn = 1000
    best_agent = None

    score_list = []
    turn_list = []
    for i in range(1):
        #for i in range(50):

        #agent = QLearning(12, 4)
        agent = QLearning(117, 4)

        # training
        agent.greedy_rate = 0.5
        for i in range(100):
            print
            print "=========================== ", i
            agent.greedy_rate += 0.05 if agent.greedy_rate < 0.7 else 0.0
            training(agent, args)
        agent.greedy_rate = 0.7

        #score, turn = play(agent, 'neural', args, [2,2])
        score, turn = play(agent)

        score_list.append(score)
        turn_list.append(turn)

        print
        print 'test one play'
        print i, int(numpy.mean(score_list)), max(score_list), score, turn

        if best_agent == None or numpy.average(
                best_agent.train_error) > numpy.average(agent.train_error):
            print 'best train error !'
            best_score = score
            best_turn = turn
            best_agent = agent
        # if best_score < score or best_turn > turn:
        #         print 'best train error !'
        #         best_score = score
        #         best_turn  = turn
        #         best_agent = agent

        with open(args['path'] + '/result.dump', 'w') as f:
            pickle.dump([score_list, turn_list, best_agent], f)
    print
    print "==========================="
    print 'best score : ', best_score
    print 'best turn : ', best_turn
def q_learning_nfq(**args):

    # estimate
    best_score = 0
    best_turn  = 1000
    best_agent = None

    score_list = []
    turn_list = []
    for i in range(1):
    #for i in range(50):

        #agent = QLearning(12, 4)
        agent = QLearning(117, 4)

        # training
        agent.greedy_rate   = 0.5
        for i in range(100):
            print
            print "=========================== ", i
            agent.greedy_rate += 0.05 if agent.greedy_rate < 0.7 else 0.0
            training(agent, args)
        agent.greedy_rate   = 0.7

        #score, turn = play(agent, 'neural', args, [2,2])
        score, turn = play(agent)

        score_list.append(score)
        turn_list.append(turn)

        print
        print 'test one play'
        print i, int(numpy.mean(score_list)) , max(score_list) , score, turn

        if best_agent==None or numpy.average(best_agent.train_error) > numpy.average(agent.train_error):
            print 'best train error !'
            best_score = score
            best_turn  = turn
            best_agent = agent
        # if best_score < score or best_turn > turn:
        #         print 'best train error !'
        #         best_score = score
        #         best_turn  = turn
        #         best_agent = agent

        with open(args['path']+'/result.dump', 'w') as f:
            pickle.dump([score_list, turn_list, best_agent], f)
    print
    print "==========================="
    print 'best score : ', best_score
    print 'best turn : ', best_turn