from environments import Cifar10VGG16
from agents import Agent
import numpy as np

if __name__ == '__main__':
    for i in range(5):
        env = Cifar10VGG16()
        done, state = env.get()
        agent = Agent(env.state_size, env.action_size)
        while not done:
            action = agent.get_action(state)
            action = np.where(action > 0.5, 1, 0)
            action, reward, done, new_state = env.step(action)
            agent.append_sample(state, action, reward)
            print('State {}: Reward {}'.format(env._current_state - 1, reward))
            state = new_state
            if done:
                agent.train_model()
        agent.model.save_weights('./saved_model/pruning_agent.h5')
Beispiel #2
0
                _episode = int(_a)
                _scores.append(float(_b))
    _n_games = _episode + 5000

    _agent = Agent((8,), 4)
    if os.path.exists(_f_checkpoint):
        _agent.net.load_checkpoint(_f_checkpoint)

    _writer = SummaryWriter(_d_log)
    _is_quit = False
    while _episode < _n_games:
        _observation = _env.reset()
        _done = False
        _score = 0.0
        while not _done:
            _action = _agent.get_action(_observation)
            _next_observation, _reward, _done, _info = _env.step(_action)
            _score += _reward
            _agent.learn(_observation, _reward, _next_observation, _done)
            _observation = _next_observation

            _rgb = _env.render("rgb_array")
            _bgr = cv2.cvtColor(_rgb, cv2.COLOR_RGB2BGR)
            cv2.imshow("frame", _bgr)
            _key_code = cv2.waitKey(1)
            if _key_code in [27, ord('q')]:
                _is_quit = True
                break
        if _is_quit:
            break
        _scores.append(_score)