def main(_): # initialize parameters DATA_DIR = "" NUM_ACTIONS = 3 # number of valid actions (left, stay, right) GAMMA = 0.99 # decay rate of past observations INITIAL_EPSILON = 0.1 # starting value of epsilon FINAL_EPSILON = 0.0001 # final value of epsilon MEMORY_SIZE = 50000 # number of previous transitions to remember NUM_EPOCHS_OBSERVE = 1 NUM_EPOCHS_TRAIN = 1000 BATCH_SIZE = 1 NUM_EPOCHS = NUM_EPOCHS_OBSERVE + NUM_EPOCHS_TRAIN model_file = "pong_net" if os.path.exists(model_file): # load the model pass else: # build the model pass pong_game = PongGame() experience = collections.deque(maxlen=MEMORY_SIZE) num_games, num_wins = 0, 0 epsilon = INITIAL_EPSILON for e in range(NUM_EPOCHS): loss = 0.0 pong_game.reset_game() # get first state a_0 = 1 # (0 = left, 1 = stay, 2 = right) x_t, r_0, game_over = pong_game.step(a_0) s_t = preprocess_frames(x_t) while not game_over: pass
import numpy as np from pong import PongGame if __name__ == '__main__': env = PongGame(competitive = False) obs = env.reset() while 1: env.render(mode='human') obs, reward, dones, win = env.step([np.random.uniform(-1,1),np.random.uniform(-1,1)])