target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. nsteps = 5000 ntrain = 4000 dqn.fit(env, nb_steps=nsteps, visualize=False, verbose=2) import fit ff = fit.Model(n_observation=4, n_action=1) # X = np.array(dqn.X) # Y = np.array(dqn.Y) dqn.X = np.array(dqn.X) dqn.Y = np.array(dqn.Y) X_train = dqn.X[1:ntrain] Y_train = dqn.Y[1:ntrain] X_test = dqn.X[ntrain + 1:nsteps] Y_test = dqn.Y[ntrain + 1:nsteps] #gpr.fit(X_train, Y_train) #print np.array(Y_test) #print gpr.predict(X_test) meanY = np.mean(Y_train, 0) stdY = np.std(Ytrain) Y_train = np.subtract(Y_train, meanY) Y_train = np.divide(Y_train, stdY)