Ejemplo n.º 1
0
    # episode = []

    for t in range(MAX_STEPS):
        # env.render()
        # action = pg_reinforce.sampleAction(state[np.newaxis,:])


        #Choose action
        rand = np.random.rand()
        if rand > .9:
            a_t = env.action_space.sample()
        else:
            data_x_0 = np.array([np.concatenate([s_t,np.array([0.])])])
            data_x_1 = np.array([np.concatenate([s_t,np.array([1.])])])
            q0 = model.forward(data_x_0).data.numpy()
            q1 = model.forward(data_x_1).data.numpy()
            if q0 > q1:
                a_t = 0
            else:
                a_t = 1



        #Apply action to environment
        s_tp1, reward, done, _ = env.step(a_t)

        total_rewards += reward
        # r_t = 0 if done else 0.1 # normalize reward
        if done:
            r_t = 0
Ejemplo n.º 2
0
for i_episode in range(MAX_EPISODES):

    s_t = env.reset()
    total_rewards = 0

    for t in range(MAX_STEPS):
        # env.render()
        # action = pg_reinforce.sampleAction(state[np.newaxis,:])

        rand = np.random.rand()
        if rand > .9:
            a_t = env.action_space.sample()
        else:
            data_x_0 = np.array([np.concatenate([s_t, np.array([0.])])])
            data_x_1 = np.array([np.concatenate([s_t, np.array([1.])])])
            q0 = model.forward(data_x_0).data.numpy()
            q1 = model.forward(data_x_1).data.numpy()
            if q0 > q1:
                a_t = 0
            else:
                a_t = 1

        # print (t, 'action', action, env.action_space)
        # afadsf

        s_tp1, reward, done, _ = env.step(a_t)

        # print (r_t)

        # print t+1, 'state', next_state, 'reward', reward, 'done', done
Ejemplo n.º 3
0
    ##################
    images_numpy = load_train_images(idx3_ubyte_file=train_images_idx3_ubyte_file)
    labels_numpy = load_train_labels(idx1_ubyte_file=train_labels_idx1_ubyte_file)

    labels_numpy = one_hot_transformer(labels_numpy)

    print('\nThe shape of all data images are:', images_numpy.shape)
    print('\nThe shape of all data labels are:', labels_numpy.shape)

    images_vectors = images_numpy.reshape((60000, -1)) / 255
    fnn = NN()
    train = SGD_Train()
    train.train_hyperparam['stop_criterion'] = 2
    fnn.model_hyperparam['batch_size'] = 128
    fnn.model_hyperparam['layer1_dim'] = 134
    fnn.model_hyperparam['layer2_dim'] = 34
    fnn.model_hyperparam['layer4_dim'] = 10
    fnn.model_hyperparam['dropout_percent'] = 0.05
    print('\nThe hyperparameters of this fully connected neuron network are:\n',fnn.model_hyperparam)
    print('The hyperparameters of training process are:\n',train.train_hyperparam)
    train.sgd_train(fnn=fnn ,images_vectors=images_vectors[0:60000] ,labels_numpy=labels_numpy[0:60000])

    Accuracy = 0
    account = 0
    fnn.model_hyperparam['dropout_percent'] = 0
    for i in range(10000 // fnn.model_hyperparam['batch_size']):
        pred_labels, _ = fnn.forward(images_vectors[50000 + i * fnn.model_hyperparam['batch_size']:50000 + (i + 1) * fnn.model_hyperparam['batch_size']],
                                     labels_numpy[50000 + i * fnn.model_hyperparam['batch_size']:50000 + (i + 1) * fnn.model_hyperparam['batch_size']], if_train=True)
        account += np.nonzero(pred_labels - np.nonzero(labels_numpy[50000 + i * fnn.model_hyperparam['batch_size']:
                                                                    50000 + (i + 1) * fnn.model_hyperparam['batch_size']])[1])[0].shape[0]
    print('The accuracy on the whole data set is %f %%:\n' % (100 - 100 * (account / 10000)))
Ejemplo n.º 4
0
for i_episode in range(MAX_EPISODES):

  s_t = env.reset()
  total_rewards = 0

  for t in range(MAX_STEPS):
    # env.render()
    # action = pg_reinforce.sampleAction(state[np.newaxis,:])

    rand = np.random.rand()
    if rand > .9:
        a_t = env.action_space.sample()
    else:
        data_x_0 = np.array([np.concatenate([s_t,np.array([0.])])])
        data_x_1 = np.array([np.concatenate([s_t,np.array([1.])])])
        q0 = model.forward(data_x_0).data.numpy()
        q1 = model.forward(data_x_1).data.numpy()
        if q0 > q1:
            a_t = 0
        else:
            a_t = 1




    # print (t, 'action', action, env.action_space)
    # afadsf

    s_tp1, reward, done, _ = env.step(a_t)

    # print (r_t)