Beispiel #1
0
def main():

    # ダミー変数化のため, [0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
    # 入力素子数は, 16×14にされる.
    ql_obj = QLearning(16, 4, dummy=False)

    max_score = 0
    score_list = []
    for i in range(10000):

        score, result = play(ql_obj)

        # Q-learning
        ql_obj.train(result)

        score_list.append(score)

        # print weight
        data = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 2], [0, 0, 0, 2]]
        output_vec = ql_obj.get_q_values(data)
        print i, numpy.mean(score_list), max(score_list), output_vec
def main():

    # ダミー変数化のため, [0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
    # 入力素子数は, 16×14にされる.
    ql_obj =  QLearning(16, 4, dummy=False)

    max_score = 0
    score_list = []
    for i in range(10000):

        score, result = play(ql_obj)

        # Q-learning
        ql_obj.train(result)

        score_list.append(score)

        # print weight
        data =[[0,0,0,0], [0,0,0,0], [0,0,0,2], [0,0,0,2]]
        output_vec= ql_obj.get_q_values(data)
        print i, numpy.mean(score_list) , max(score_list), output_vec