# episode = [] for t in range(MAX_STEPS): # env.render() # action = pg_reinforce.sampleAction(state[np.newaxis,:]) #Choose action rand = np.random.rand() if rand > .9: a_t = env.action_space.sample() else: data_x_0 = np.array([np.concatenate([s_t,np.array([0.])])]) data_x_1 = np.array([np.concatenate([s_t,np.array([1.])])]) q0 = model.forward(data_x_0).data.numpy() q1 = model.forward(data_x_1).data.numpy() if q0 > q1: a_t = 0 else: a_t = 1 #Apply action to environment s_tp1, reward, done, _ = env.step(a_t) total_rewards += reward # r_t = 0 if done else 0.1 # normalize reward if done: r_t = 0
for i_episode in range(MAX_EPISODES): s_t = env.reset() total_rewards = 0 for t in range(MAX_STEPS): # env.render() # action = pg_reinforce.sampleAction(state[np.newaxis,:]) rand = np.random.rand() if rand > .9: a_t = env.action_space.sample() else: data_x_0 = np.array([np.concatenate([s_t, np.array([0.])])]) data_x_1 = np.array([np.concatenate([s_t, np.array([1.])])]) q0 = model.forward(data_x_0).data.numpy() q1 = model.forward(data_x_1).data.numpy() if q0 > q1: a_t = 0 else: a_t = 1 # print (t, 'action', action, env.action_space) # afadsf s_tp1, reward, done, _ = env.step(a_t) # print (r_t) # print t+1, 'state', next_state, 'reward', reward, 'done', done
################## images_numpy = load_train_images(idx3_ubyte_file=train_images_idx3_ubyte_file) labels_numpy = load_train_labels(idx1_ubyte_file=train_labels_idx1_ubyte_file) labels_numpy = one_hot_transformer(labels_numpy) print('\nThe shape of all data images are:', images_numpy.shape) print('\nThe shape of all data labels are:', labels_numpy.shape) images_vectors = images_numpy.reshape((60000, -1)) / 255 fnn = NN() train = SGD_Train() train.train_hyperparam['stop_criterion'] = 2 fnn.model_hyperparam['batch_size'] = 128 fnn.model_hyperparam['layer1_dim'] = 134 fnn.model_hyperparam['layer2_dim'] = 34 fnn.model_hyperparam['layer4_dim'] = 10 fnn.model_hyperparam['dropout_percent'] = 0.05 print('\nThe hyperparameters of this fully connected neuron network are:\n',fnn.model_hyperparam) print('The hyperparameters of training process are:\n',train.train_hyperparam) train.sgd_train(fnn=fnn ,images_vectors=images_vectors[0:60000] ,labels_numpy=labels_numpy[0:60000]) Accuracy = 0 account = 0 fnn.model_hyperparam['dropout_percent'] = 0 for i in range(10000 // fnn.model_hyperparam['batch_size']): pred_labels, _ = fnn.forward(images_vectors[50000 + i * fnn.model_hyperparam['batch_size']:50000 + (i + 1) * fnn.model_hyperparam['batch_size']], labels_numpy[50000 + i * fnn.model_hyperparam['batch_size']:50000 + (i + 1) * fnn.model_hyperparam['batch_size']], if_train=True) account += np.nonzero(pred_labels - np.nonzero(labels_numpy[50000 + i * fnn.model_hyperparam['batch_size']: 50000 + (i + 1) * fnn.model_hyperparam['batch_size']])[1])[0].shape[0] print('The accuracy on the whole data set is %f %%:\n' % (100 - 100 * (account / 10000)))
for i_episode in range(MAX_EPISODES): s_t = env.reset() total_rewards = 0 for t in range(MAX_STEPS): # env.render() # action = pg_reinforce.sampleAction(state[np.newaxis,:]) rand = np.random.rand() if rand > .9: a_t = env.action_space.sample() else: data_x_0 = np.array([np.concatenate([s_t,np.array([0.])])]) data_x_1 = np.array([np.concatenate([s_t,np.array([1.])])]) q0 = model.forward(data_x_0).data.numpy() q1 = model.forward(data_x_1).data.numpy() if q0 > q1: a_t = 0 else: a_t = 1 # print (t, 'action', action, env.action_space) # afadsf s_tp1, reward, done, _ = env.step(a_t) # print (r_t)