# os.mkdir(episodeDirectory) tetris.start() state = tetris.get_state() episodeReward = 0 step = 0 while step < maxStepPerEpisode: # if render and numSteps % renderStepDuration == 0: # image = tetris.get_printed_state() # plt.imshow(image) # plt.savefig(episodeDirectory + "s%d.jpg" % step) action = select_action(state) step += 1 next_state, reward, done = tetris.step(action) if done: next_state = None memory.add((state, action, reward, next_state)) state = next_state episodeReward += reward train() numSteps += 1 if numSteps % numStepPerUpdate == 0: targetNet.load_state_dict(policyNet.state_dict()) if done: break
from tetris.tetris import Tetris T = Tetris(render=True) T.start() T.print_state() while True: action = int(input("Action: ")) if action < 0: T.start() else: T.step(action) T.print_state()