Esempio n. 1
0
def test(episodes=20, agent=None, load_path=None, ifrender=False, log=False):
    if log:
        logger.configure(dir="./log/", format_strs="stdout")
    if agent is None:
        agent = DQN(num_state=16, num_action=4)
        if load_path:
            agent.load(load_path)
        else:
            agent.load()

    env = Game2048Env()
    score_list = []
    highest_list = []

    for i in range(episodes):
        state, _, done, info = env.reset()
        state = log2_shaping(state)

        start = time.time()
        while True:
            action = agent.select_action(state, deterministic=True)
            next_state, _, done, info = env.step(action)
            next_state = log2_shaping(next_state)
            state = next_state

            if ifrender:
                env.render()

            if done:
                print(env.Matrix)
                if log:
                    logger.logkv('episode number', i + 1)
                    logger.logkv('episode reward', info['score'])
                    logger.logkv('episode steps', info['steps'])
                    logger.logkv('highest', info['highest'])
                    logger.dumpkvs()
                break

        end = time.time()
        if log:
            print('episode time:{} s\n'.format(end - start))

        score_list.append(info['score'])
        highest_list.append(info['highest'])

    print('mean score:{}, mean highest:{}'.format(np.mean(score_list),
                                                  np.mean(highest_list)))
    print('max score:{}, max hightest:{}'.format(np.max(score_list),
                                                 np.max(highest_list)))
    result_info = {
        'mean': np.mean(score_list),
        'max': np.max(score_list),
        'list': score_list
    }
    print(highest_list)
    return result_info
Esempio n. 2
0
    def run(ifrender=False):
        agent = RandomAgent()
        env = Game2048Env()
        state, reward, done, info = env.reset()
        if ifrender:
            env.render()

        start = time.time()
        while True:
            action = agent.act(state)
            # print('action: {}'.format(action))
            state, reward, done, info = env.step(action)
            if ifrender:
                env.render()
            if done:
                print('\nfinished, info:{}'.format(info))
                break

        end = time.time()
        print('episode time:{} s\n'.format(end - start))
        return end - start, info['highest'], info['score'], info['steps']
Esempio n. 3
0
from flask import Flask, request, render_template, redirect, url_for, abort, jsonify, make_response
import os
import numpy as np
from gym_2048 import Game2048Env
from dqn_agent import DQN
from utils import log2_shaping

env = Game2048Env()

app = Flask(__name__, static_folder="static", static_url_path="/static")

agent = DQN(num_state=16, num_action=4)
agent.load(path='./save/', name='dqn_9017.pkl')

# @app.route('/1')
# def hello_world():
#     with open('./templates/index.html', encoding='utf-8')as f:
#         text = f.read()
#     return text


@app.route('/')
def hello_world2():
    with open('./templates/py2048.html', encoding='utf-8') as f:
        text = f.read()
    return text


@app.route('/move', methods=['POST'])
def tile_move():
    json = request.get_json()
Esempio n. 4
0
def train():
    episodes = train_episodes
    logger.configure(dir="./log/", format_strs="stdout,tensorboard,log")
    agent = DQN(num_state=16, num_action=4)
    env = Game2048Env()

    pf_saver = Perfomance_Saver()
    model_saver = Model_Saver(num=10)

    eval_max_score = 0
    for i in range(episodes):
        state, reward, done, info = env.reset()
        state = log2_shaping(state)

        start = time.time()
        loss = None
        while True:
            if agent.buffer.memory_counter <= agent.memory_capacity:
                action = agent.select_action(state, random=True)
            else:
                action = agent.select_action(state)

            next_state, reward, done, info = env.step(action)
            next_state = log2_shaping(next_state)
            reward = log2_shaping(reward, divide=1)

            agent.store_transition(state, action, reward, next_state)
            state = next_state

            if ifrender:
                env.render()

            if agent.buffer.memory_counter % agent.train_interval == 0 and agent.buffer.memory_counter > agent.memory_capacity:  # 相当于填满后才update
                loss = agent.update()

            if done:
                if i % log_interval == 0:
                    if loss:
                        logger.logkv('loss', loss)
                    logger.logkv('training progress', (i+1) / episodes)
                    logger.logkv('episode reward', info['score'])
                    logger.logkv('episode steps', info['steps'])
                    logger.logkv('highest', info['highest'])
                    logger.logkv('epsilon', agent.epsilon)
                    logger.dumpkvs()

                    loss = None

                if i % epsilon_decay_interval == 0:   # episilon decay
                    agent.epsilon_decay(i, episodes)
                break
        
        end = time.time()
        print('episode time:{} s\n'.format(end - start))

        # eval 
        if i % eval_interval == 0 and i:
            eval_info = test(episodes=test_episodes, agent=agent)
            average_score, max_score, score_lis = eval_info['mean'], eval_info['max'], eval_info['list']

            pf_saver.save(score_lis, info=f'episode:{i}')

            if int(average_score) > eval_max_score:
                eval_max_score = int(average_score)
                name = 'dqn_{}.pkl'.format(int(eval_max_score))
                agent.save(name=name)
                model_saver.save("./save/" + name)

            logger.logkv('eval average score', average_score)
            logger.logkv('eval max socre', max_score)
            logger.dumpkvs()