def __init__(self, params): # private variables with read access self.__agent = gym.make( "racer-v0", render_mode=params["render_mode"], sensor_array_type=params["sensor_type"], sensor_array_params=params["sensor_array_params"]) self.__obs = self.__agent.reset() self.__done = False self.__reward = None self.__network = network.neural_network( self.agent.observation_space.shape[0], 8) self.__offline_network = network.neural_network( self.agent.observation_space.shape[0], 8, requires_grad=False) self.__memory = None # public variables self.render_mode = params["render_mode"] return
plt.figure(2) plt.scatter(dataset[:, 0].reshape((dataset.shape[0], 1)), dataset[:, 3].reshape((dataset.shape[0], 1))) plt.savefig('./Plots/g.png') plt.figure(5) plt.scatter(dataset[:, 0].reshape((dataset.shape[0], 1)), dataset[:, 4].reshape((dataset.shape[0], 1))) plt.savefig('./Plots/d.png') print(dataset.shape) g = tf.Graph() with g.as_default(): model_G = neural_network(1, 1, [10], name='Model_G_') init_g = tf.initialize_all_variables() x_g = tf.placeholder(tf.float64, [None, 1]) G = model_G.value(x_g) d1g = model_G.dx(x_g) d2g = model_G.d2x(x_g) d = tf.Graph() with d.as_default(): model_D = neural_network(1, 1, [10, 10], name='Model_D_') init_d = tf.initialize_all_variables() x_d = tf.placeholder(tf.float64, [None, 1]) D = model_D.value(x_d) d1d = model_D.dx(x_d) d2d = model_D.d2x(x_d)
e_greedy = 0.4 elif 750 <= episode < 800: e_greedy = 0.3 elif 800 <= episode < 850: e_greedy = 0.2 else: e_greedy = 0.1 # sample net structure, train and get accuracy S, U = table.sample_new_network(epsilon=e_greedy) while S in Memory_S: S, U = table.sample_new_network(epsilon=e_greedy) net_structure = deepcopy(U) net_structure.append(('T', 4)) nn = neural_network(net_structure=net_structure, input_shape=input_shape) nn.compile_model() nn.fit_model(X_train, y_train, batch_size=128, nb_epoch=5, val_X=X_test, val_y=y_test, verbose=1) accuracy = nn.evaluate_model() print('score :', accuracy) # store to replay memory Memory_S.append(S) Memory_U.append(U) Memory_accuracy.append(accuracy) # update q-table for k times for memory in range(k_replay_update): S_sample, U_sample, accuracy_sample = uniform(Memory_S, Memory_U, Memory_accuracy) table.update_q_values(S_sample, U_sample, accuracy_sample)
lr = 0.01 N = 3 data_sampler = sample_dataset(dataset, batch_size, N) n_batches = int(len(dataset)/batch_size) plt.figure(1); plt.scatter(dataset[:,0].reshape((dataset.shape[0],1)), dataset[:,N].reshape((dataset.shape[0],1))) plt.savefig('./Plots/1.png') #placeholders for training data x = tf.placeholder(tf.float64, [None, 1]) y = tf.placeholder(tf.float64, [None, 1]) model = neural_network(1,1,[10], name='Model_G_') network_out = model.value(x) loss = tf.reduce_mean(tf.nn.l2_loss(network_out-y)) optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss) init = tf.initialize_all_variables() saver = tf.train.Saver(save_relative_paths=True) with tf.Session() as sess: # create initialized variables best_loss = sys.maxsize sess.run(init)