Ejemplo n.º 1
0
def evaluate(conf, params, X_data, Y_data):
    """Evaluate a trained model on X_data.

    Args:
        conf: Configuration dictionary
        params: Dictionary with parameters
        X_data: numpy array of floats with shape [input dimension, number of examples]
        Y_data: numpy array of integers with shape [output dimension, number of examples]
    Returns:
        num_correct_total: Integer
        num_examples_evaluated: Integer
    """

    num_examples = X_data.shape[1]
    num_examples_evaluated = 0
    num_correct_total = 0
    start_ind = 0
    end_ind = conf['batch_size']
    while True:
        X_batch = X_data[:, start_ind:end_ind]
        Y_batch = model.one_hot(Y_data[start_ind:end_ind],
                                conf['output_dimension'])
        Y_proposal, _ = model.forward(conf, X_batch, params, is_training=False)
        _, num_correct = model.cross_entropy_cost(Y_proposal, Y_batch)
        num_correct_total += num_correct

        num_examples_evaluated += end_ind - start_ind

        start_ind += conf['batch_size']
        end_ind += conf['batch_size']

        if end_ind >= num_examples:
            end_ind = num_examples

        if start_ind >= num_examples:
            break

    return num_correct_total, num_examples_evaluated
Ejemplo n.º 2
0
def train(conf, X_train, Y_train, X_devel, Y_devel):
    """Run training

    Args:
        conf: Configuration dictionary
        X_train: numpy array of floats with shape [input dimension, number of train examples]
        Y_train: numpy array of integers with shape [output dimension, number of train examples]
        X_devel: numpy array of floats with shape [input dimension, number of devel examples]
        Y_devel: numpy array of integers with shape [output dimension, number of devel examples]
    Returns:
        params: Dictionary with trained parameters
        train_progress: Dictionary with progress data, to be used in visualization.
        devel_progress: Dictionary with progress data, to be used in visualization.
    """
    print("Run training")

    # Preparation
    num_examples_in_epoch = X_train.shape[1]
    example_indices = np.arange(0, num_examples_in_epoch)
    np.random.shuffle(example_indices)

    # Initialisation
    params = model.initialization(conf)

    # For displaying training progress
    train_steps = []
    train_ccr = []
    train_cost = []
    devel_steps = []
    devel_ccr = []

    # Start training
    step = 0
    epoch = 0
    num_correct_since_last_check = 0
    batch_start_index = 0
    batch_end_index = conf['batch_size']
    print("Number of training examples in one epoch: ", num_examples_in_epoch)
    print("Start training")
    while True:
        start_time = time.time()
        batch_indices = get_batch_indices(example_indices, batch_start_index,
                                          batch_end_index)
        X_batch = X_train[:, batch_indices]
        Y_batch = model.one_hot(Y_train[batch_indices],
                                conf['output_dimension'])

        Y_proposal, features = model.forward(conf,
                                             X_batch,
                                             params,
                                             is_training=True)
        print("Finish Foward")
        cost_value, num_correct = model.cross_entropy_cost(Y_proposal, Y_batch)
        #print("Finish Cross Entropy")
        grad_params = model.backward(conf, Y_proposal, Y_batch, params,
                                     features)
        print("Finish Backward")
        params = model.gradient_descent_update(conf, params, grad_params)
        print("Finish Gradient Update")
        print("Finish Training Number" + repr(step))

        num_correct_since_last_check += num_correct

        batch_start_index += conf['batch_size']
        batch_end_index += conf['batch_size']
        if batch_start_index >= num_examples_in_epoch:
            epoch += 1
            np.random.shuffle(example_indices)
            batch_start_index = 0
            batch_end_index = conf['batch_size']

        step += 1

        if np.isnan(cost_value):
            print("ERROR: nan encountered")
            break

        if step % conf['train_progress'] == 0:
            elapsed_time = time.time() - start_time
            sec_per_batch = elapsed_time / conf['train_progress']
            examples_per_sec = conf['batch_size'] * conf[
                'train_progress'] / elapsed_time
            ccr = num_correct / conf['batch_size']
            running_ccr = (num_correct_since_last_check /
                           conf['train_progress'] / conf['batch_size'])
            num_correct_since_last_check = 0
            train_steps.append(step)
            train_ccr.append(running_ccr)
            train_cost.append(cost_value)
            if conf['verbose']:
                print(
                    "S: {0:>7}, E: {1:>4}, cost: {2:>7.4f}, CCR: {3:>7.4f} ({4:>6.4f}),  "
                    "ex/sec: {5:>7.3e}, sec/batch: {6:>7.3e}".format(
                        step, epoch, cost_value, ccr, running_ccr,
                        examples_per_sec, sec_per_batch))

        if step % conf['devel_progress'] == 0:
            num_correct, num_evaluated = evaluate(conf, params, X_devel,
                                                  Y_devel)
            devel_steps.append(step)
            devel_ccr.append(num_correct / num_evaluated)
            if conf['verbose']:
                print(
                    "S: {0:>7}, Test on development set. CCR: {1:>5} / {2:>5} = {3:>6.4f}"
                    .format(step, num_correct, num_evaluated,
                            num_correct / num_evaluated))

        if step >= conf['max_steps']:
            print("Terminating training after {} steps".format(step))
            break

    train_progress = {
        'steps': train_steps,
        'ccr': train_ccr,
        'cost': train_cost
    }
    devel_progress = {'steps': devel_steps, 'ccr': devel_ccr}

    return params, train_progress, devel_progress
Ejemplo n.º 3
0
def train_bpnet():
    path = "mnist"
    # dataset preparation
    X_train, y_train = load_mnist(path, "train")
    X_test, y_test = load_mnist(path, "t10k")
    # dataset defination
    train_data = DataSet(X_train, y_train)
    test_data = DataSet(X_test, y_test)
    # parameters defination
    in_size = 784
    hid_size = 100
    out_size = 10
    batch_size = 20
    epoches = 150
    # model defination
    net = BpNet(in_size, hid_size, out_size)
    print(train_data.input_data.shape, train_data.target_data.shape)
    # determine inputs dtype
    x = T.dmatrix("x")
    y = T.dmatrix("y")

    # defination of the layers
    learning_rate = 0.1

    prediction = net.forward(x)
    # cost function defination
    cost = cross_entropy_cost(prediction, y)
    # update the grad
    list_q = net.update_grad(cost, learning_rate)

    # apply gradient descent
    train = theano.function(inputs=[x, y], outputs=[cost], updates=list_q)
    # prediction
    predict = theano.function(inputs=[x], outputs=prediction)
    # training model
    loss_list = []
    percentage_list = []
    for k in range(epoches):
        Length = len(X_train) // batch_size
        sum_loss = 0
        for j in range(Length):
            out_x = X_train[j * batch_size:(j + 1) * batch_size, :]
            out_y = y_train[j * batch_size:(j + 1) * batch_size, :]
            err = train(out_x, out_y)
            sum_loss += err[0]
        out_pre = predict(test_data.input_data)
        out_org = test_data.target_data
        percentage = test_data_op(out_pre, out_org)
        print("epoches:%d loss:%0.4f correct:%0.2f%%" %
              (k, sum_loss / Length, percentage * 100))
        loss_list.append(sum_loss / Length)
        percentage_list.append(percentage)
    # ----------------------------------------------------------------
    # save the model
    model_name = 'bpnet.pkt'
    path_save = 'bpnet'
    if not os.path.exists(path_save):
        os.mkdir(path_save)
    f = open(os.path.join(path_save, model_name), 'wb')
    pickle.dump(net, f, protocol=pickle.HIGHEST_PROTOCOL)
    f.close()
    # ----------------------------------------------------------------
    # save the loss image
    x = np.linspace(0, len(loss_list), len(loss_list))
    plt.plot(x, loss_list)
    plt.savefig(os.path.join(path_save, "loss.png"))
    plt.show()
    with open(os.path.join(path_save, "loss.txt"), "w") as fp:
        for k in range(len(loss_list)):
            fp.write(str(loss_list[k]) + "\n")
    x = np.linspace(0, len(percentage_list), len(percentage_list))
    plt.plot(x, percentage_list)
    plt.savefig(os.path.join(path_save, "percentage.png"))
    plt.show()
    with open(os.path.join(path_save, "percentage.txt"), "w") as fp:
        for k in range(len(percentage_list)):
            fp.write(str(percentage_list[k]) + "\n")
Ejemplo n.º 4
0
def main_test():
    print("----------START OF TESTS-----------")
    # Get configuration
    conf = config()
    
    ################################### Task 1.1: Parameter initialization
    
    from model import initialization
    params = initialization(conf)
    
    ################################### Task 1.2: Forward propagation
    
    # Import Activation functions [1.2a & 1.2b]
    from model import activation
    from model import softmax
    
    # Test Activation functions
    from tests import task_2a
    from tests import task_2b
    input_Z, expected_A = task_2a()
    A = activation(input_Z, 'relu')
    print('Activation valid?:',np.array_equal(expected_A, A))
    input_Z, expected_S = task_2b()
    S = softmax(input_Z)
    print('Softmax valid?:',np.array_equal(np.round(expected_S,decimals=3), np.round(S,decimals=3)))
    
    # Import Forward propagation [1.2c]
    from model import forward
    from tests import task_2c
    
    ### Test Forward propagation
    conf, X_batch, params, expected_Z_1, expected_A_1, expected_Z_2, expected_Y_proposed = task_2c()
    Y_proposed, features = forward(conf, X_batch, params, is_training=True)
    print('feature Z_1 valid?:',np.array_equal(expected_Z_1, np.round(features['Z_1'],decimals=8)))
    print('feature A_1 valid?:',np.array_equal(expected_A_1, np.round(features['A_1'],decimals=8)))
    print('feature Z_2 valid?:',np.array_equal(expected_Z_2, np.round(features['Z_2'],decimals=8)))
    print('proposed Y valid?:',np.array_equal(expected_Y_proposed, np.round(Y_proposed,decimals=8)))
    
    ################################### Task 1.3: Cross Entropy cost function
    
    # Import Cost function
    from model import cross_entropy_cost
    from tests import task_3
    
    ### Test Cost function
    Y_proposed, Y_batch, expected_cost_value, expected_num_correct = task_3()
    cost_value, num_correct = cross_entropy_cost(Y_proposed, Y_batch)
    print('Cost value valid?:',np.array_equal(np.round(expected_cost_value,decimals=4), np.round(cost_value,decimals=4)))
    print('Number of succesess valid?:',np.array_equal(expected_num_correct, np.round(num_correct,decimals=4)))
    
    ################################### Task 1.4: Backward propagation
    
    # Import Derivative of the activation function [1.4a]
    from model import activation_derivative
    from tests import task_4a
    
    # Test Derivative of activation
    input_Z, expected_dg_dz = task_4a()
    dg_dz = activation_derivative(input_Z, "relu")
    print('Derivative function valid?:',np.array_equal(expected_dg_dz, np.round(dg_dz,decimals=4)))

    # Import Backward propagation [1.4b]
    from model import backward
    from tests import task_4b
    
    # Test Backward propagation
    (conf, Y_proposed, Y_batch, params, features,
     expected_grad_W_1, expected_grad_b_1, expected_grad_W_2, expected_grad_b_2) = task_4b()
    grad_params = backward(conf, Y_proposed, Y_batch, params, features)
    print('Grad_W_1 valid?:',np.array_equal(np.round(expected_grad_W_1,decimals=4), np.round(grad_params["grad_W_1"],decimals=4)))
    print('Grad_b_1 valid?:',np.array_equal(np.round(expected_grad_b_1,decimals=4), np.round(grad_params["grad_b_1"][:, np.newaxis],decimals=4)))
    print('Grad_W_2 valid?:',np.array_equal(np.round(expected_grad_W_2,decimals=4), np.round(grad_params["grad_W_2"],decimals=4)))
    print('Grad_b_2 valid?:',np.array_equal(np.round(expected_grad_b_2,decimals=4), np.round(grad_params["grad_b_2"][:, np.newaxis],decimals=4)))
    
    ################################### Task 1.5: Update parameters
    
    # Import Update
    from model import gradient_descent_update
    from tests import task_5
    
    # Test Update
    (conf, params, grad_params,
     expected_updated_W_1, expected_updated_b_1, expected_updated_W_2, expected_updated_b_2) = task_5()
    updated_params = gradient_descent_update(conf, params, grad_params)
    
    print('update of W_1 valid?:',np.array_equal(np.round(expected_updated_W_1,decimals=4), np.round(updated_params["W_1"],decimals=4)))
    print('update of b_1 valid?:',np.array_equal(np.round(expected_updated_b_1,decimals=4), np.round(updated_params["b_1"],decimals=4)))
    print('update of W_2 valid?:',np.array_equal(np.round(expected_updated_W_2,decimals=4), np.round(updated_params["W_2"],decimals=4)))
    print('update of b_2 valid?:',np.array_equal(np.round(expected_updated_b_2,decimals=4), np.round(updated_params["b_2"],decimals=4)))

    print("----------END OF TESTS-----------")