def main(): z_size=128 batch_size=32 learning_rate=0.0001 vae = CNN(z_size=z_size, batch_size=batch_size, learning_rate=learning_rate, is_training=True, reuse=False, gpu_mode=False) env = 'Breakout-v0' done_reward = -1 gamma = 0.95 epsilon = 0.05 save_freq = 10000 batch_size = 32 memory_size = 2000 vae = vae rnn = None controller = None memory = Memory(memory_size) preprocessor = Preprocessor() foresee_steps = 1 DQA = DQNAgent(env = env, done_reward = done_reward, gamma = gamma, epsilon = epsilon, save_freq = save_freq, batch_size = batch_size, memory = memory, cnn = vae, controller = controller, preprocessor = preprocessor, foresee_steps = foresee_steps, #vae_path = 'tf_cnn/cnn.json', #rnn_path = 'tf_rnn/rnn.json', ) ''' tmp_reward = [] for _ in range(10): _, reward = DQA.generate_path(is_random = False) tmp_reward.append(reward) print(np.mean(tmp_reward)) ''' ''' path, _ = DQA.generate_path(is_random = False) DQA.test_vae(path) #DQA.test_rnn(path) #DQA.test_preprocess(path) ''' o = open('results.txt','w') o.write('Start!') o.write('\n') S = [] res = DQA.fill_memory(2000, is_random = True) S.append(res) DQA.train_vae() DQA.train_rnn() o.write(str(res)) o.write('\n') o.close() for i in range(100): o = open('results.txt','a') DQA.memory.clear() res = DQA.fill_memory(300, is_random = False) o.write(str(res)) o.write('\n') o.close() S.append(res) DQA.train_vae() DQA.train_rnn() print(S) '''