Example #1
0
def main():
    z_size=128
    batch_size=32
    learning_rate=0.0001
    vae = CNN(z_size=z_size,
              batch_size=batch_size,
              learning_rate=learning_rate,
              is_training=True,
              reuse=False,
              gpu_mode=False)
    env = 'Breakout-v0'
    done_reward = -1
    gamma = 0.95
    epsilon = 0.05
    save_freq = 10000
    batch_size = 32
    memory_size = 2000
    vae = vae
    rnn = None 
    controller = None
    memory = Memory(memory_size)
    preprocessor = Preprocessor()
    foresee_steps = 1
    DQA = DQNAgent(env = env,
        done_reward = done_reward,
        gamma = gamma,
        epsilon = epsilon,
        save_freq = save_freq,
        batch_size = batch_size,
        memory = memory,
        cnn = vae, 
        controller = controller, 
        preprocessor = preprocessor,
        foresee_steps = foresee_steps,
        #vae_path = 'tf_cnn/cnn.json',
        #rnn_path = 'tf_rnn/rnn.json',
        )
    '''
    tmp_reward = []
    for _ in range(10):
        _, reward = DQA.generate_path(is_random = False)
        tmp_reward.append(reward)
    print(np.mean(tmp_reward))
    '''
    '''
    path, _ = DQA.generate_path(is_random = False)
    DQA.test_vae(path)
    #DQA.test_rnn(path)
    #DQA.test_preprocess(path)
    '''
    o = open('results.txt','w')
    o.write('Start!')
    o.write('\n')
    S = []
    res = DQA.fill_memory(2000, is_random = True)
    S.append(res)
    DQA.train_vae()
    DQA.train_rnn()
    o.write(str(res))
    o.write('\n')
    o.close()
    for i in range(100):
        o = open('results.txt','a')
        DQA.memory.clear()
        res = DQA.fill_memory(300, is_random = False)
        o.write(str(res))
        o.write('\n')
        o.close()
        S.append(res)
        DQA.train_vae()
        DQA.train_rnn()
        print(S)
    
    '''