target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
nsteps = 5000
ntrain = 4000
dqn.fit(env, nb_steps=nsteps, visualize=False, verbose=2)

import fit
ff = fit.Model(n_observation=4, n_action=1)
# X = np.array(dqn.X)
# Y = np.array(dqn.Y)
dqn.X = np.array(dqn.X)
dqn.Y = np.array(dqn.Y)

X_train = dqn.X[1:ntrain]
Y_train = dqn.Y[1:ntrain]
X_test = dqn.X[ntrain + 1:nsteps]
Y_test = dqn.Y[ntrain + 1:nsteps]

#gpr.fit(X_train, Y_train)
#print np.array(Y_test)
#print gpr.predict(X_test)
meanY = np.mean(Y_train, 0)
stdY = np.std(Ytrain)
Y_train = np.subtract(Y_train, meanY)
Y_train = np.divide(Y_train, stdY)