from environments import Cifar10VGG16 from agents import Agent import numpy as np if __name__ == '__main__': for i in range(5): env = Cifar10VGG16() done, state = env.get() agent = Agent(env.state_size, env.action_size) while not done: action = agent.get_action(state) action = np.where(action > 0.5, 1, 0) action, reward, done, new_state = env.step(action) agent.append_sample(state, action, reward) print('State {}: Reward {}'.format(env._current_state - 1, reward)) state = new_state if done: agent.train_model() agent.model.save_weights('./saved_model/pruning_agent.h5')
_episode = int(_a) _scores.append(float(_b)) _n_games = _episode + 5000 _agent = Agent((8,), 4) if os.path.exists(_f_checkpoint): _agent.net.load_checkpoint(_f_checkpoint) _writer = SummaryWriter(_d_log) _is_quit = False while _episode < _n_games: _observation = _env.reset() _done = False _score = 0.0 while not _done: _action = _agent.get_action(_observation) _next_observation, _reward, _done, _info = _env.step(_action) _score += _reward _agent.learn(_observation, _reward, _next_observation, _done) _observation = _next_observation _rgb = _env.render("rgb_array") _bgr = cv2.cvtColor(_rgb, cv2.COLOR_RGB2BGR) cv2.imshow("frame", _bgr) _key_code = cv2.waitKey(1) if _key_code in [27, ord('q')]: _is_quit = True break if _is_quit: break _scores.append(_score)