SESSION.run(init) ########################################## if not SKIP_LEARNING: time_start = time() print("\nFilling out replay memory") updateTarget(targetOps, SESSION) agent.reset_cell_state() state = game.get_state() for _ in range(RANDOM_WANDER_STEPS): if not LOAD_MODEL: action = agent.random_action() else: action = agent.act(game.get_last_action(), state) img_state, reward, done = game.make_action(action) if not done: state_new = img_state else: state_new = None agent.add_transition(state, action, reward, state_new, done) state = state_new if done: game.reset() agent.reset_cell_state() state = game.get_state() max_avgR = -10000.0