""" m = 1000 # number of trajectories to generate for checking accuracy of learned algorithm n = 1000 # number of time steps for each trajectory lrz.dt = 0.01 # set default time step to 0.01 xstart = np.zeros((m, 3)) # stores the initial state of the n trajectories xend = np.zeros((m, 3)) # stores the final state of the n trajectories task = np.zeros( (m, 1), dtype=float ) # for each trajectory task stores 1 (resp. 0) for control objective # (resp. not) achieved for j in range(m): xstart[j, :] = lrz.reset() lrz.trajectory(n, 0) xend[j, :] = lrz.state if lrz.reward() > -0.15: task[j, 0] = 1.0 print('Efficiency of the learning algorithm is ', np.squeeze(100 * sum(task) / m), '%') fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111, projection='3d') ax.scatter(xstart[:, 0], xstart[:, 1], xstart[:, 2], c='k', marker='x', label="starting states") ax.scatter(xend[:, 0], xend[:, 1],
r = 1.5 lrz = Lorenz(sigma, b, r) # initialize lorenz object with given parameters n_samples = 1000 # set number of training samples lrz.X, lrz.U = np.zeros((n_samples, 3)), np.zeros( (n_samples, 1)) # initialize training data to 0 """ Training randomly initialize the state of the lorenz object and set lrz.X[i, :] to the initial state lorenz object takes one step with -ve control and gets reward r1 reset the lorenz state back to starting state and take another step with +ve control which gives reward r2 Set policy lrz.U[i, 0] to -1 or 1 depending upon which policy maximizes reward """ for i in range(n_samples): lrz.X[i, :] = lrz.reset() lrz.step(-lrz.max_control) r1 = lrz.reward() lrz.state = lrz.X[i, :] lrz.step(lrz.max_control) r2 = lrz.reward() lrz.U[i, 0] = 2 * np.argmax([r1, r2]) - 1 data = { 'sigma': sigma, 'b': b, 'r': r, 'n_samples': n_samples, 'X': lrz.X, 'U': lrz.U } #write a file f = open("learning_algorithm2_training_data", "wb")