예제 #1
0
def check_numerical_gradient():
    X = loader.load_train_imgs()[:64, :10]
    Y = loader.load_train_labels()[:10]
    levels = [64, 20, 20, 10]
    theta = initialization(levels)
    print theta.shape
    lbda = 3e-3
    grad = J(theta, X, Y, levels, lbda)[1]
    numeric_grad = compute_numerical_gradient(J, theta, X, Y, levels, lbda)
    for i, g in enumerate(grad):
        print g, numeric_grad[i]
예제 #2
0
def train():
    global input_size, num_labels, hidden_size_l1, hidden_size_l2, lbda
    levels = [input_size, hidden_size_l1, hidden_size_l2, num_labels]
    train_data = loader.load_train_imgs()
    train_labels = loader.load_train_labels()
    WB_l1, WB_l2, sr_mat = pre_train(train_data, train_labels)
    init_theta = stack_params((WB_l1[0], WB_l2[0]), sr_mat)
    
    theta = fine_tuning(init_theta, train_data, train_labels, levels, lbda)
    
    return theta
예제 #3
0
def test(theta):
    global input_size, num_labels, hidden_size_l1, hidden_size_l2
    levels = [input_size, hidden_size_l1, hidden_size_l2, num_labels]
    train_data = loader.load_train_imgs(u'../MNISTHelper/t10k-images.idx3-ubyte')
    train_labels = loader.load_train_labels(u'../MNISTHelper/t10k-labels.idx1-ubyte')
    pl = predict(train_data, theta, levels)
    print train_labels[:20]
    print pl[:20]
    e = 0.
    for i in range(len(pl)):
        if train_labels[i] != pl[i]:
            e += 1
    print 'error rate:', e/len(pl)
예제 #4
0
파일: stl.py 프로젝트: jayshonzs/UFLDL
def main():
    global input_size, num_labels, hidden_size, ro, lbda, beta
    levels = [input_size, hidden_size, input_size]
    mnist_data = loader.load_train_imgs()
    mnist_labels = loader.load_train_labels()
    labeled_set = np.where(mnist_labels < num_labels)[0]
    unlabeled_set = np.where(mnist_labels > num_labels-1)[0]
    train_num = np.round(len(labeled_set)/2)
    train_data = mnist_data[:, labeled_set[:train_num]]
    train_labels = mnist_labels[labeled_set[:train_num]]
    test_data = mnist_data[:, labeled_set[train_num:]]
    test_labels = mnist_labels[labeled_set[train_num:]]
    unlabeled_data = mnist_data[:, unlabeled_set]
    print "train data:", train_data.shape[1]
    print "test data:", test_data.shape[1]
    print "unlabeled data:", unlabeled_data.shape[1]
    init_theta = initialization(levels)

    theta, cost, info = fmin_l_bfgs_b(J, init_theta, args=(unlabeled_data, unlabeled_data, levels, lbda, ro, beta), approx_grad=False, maxiter=400)
    print "cost:", cost
    print "info:", info

    WB = vec2mat(theta, levels)
    sio.savemat('W', {'W': WB[0][0]})
    train_a2 = feed_forward(WB[0], train_data, hidden_size)
    test_a2 = feed_forward(WB[0], test_data, hidden_size)
    sr_init_theta = sr_train.initialize_theta(hidden_size, num_labels)
    sr_theta = sr_train.train(sr_init_theta, train_a2, train_labels, num_labels)
    sr_mat = sr_vec2mat(sr_theta, num_labels)
    pY = sr_predict(sr_mat, test_a2)
    print test_labels[:20]
    print pY[:20]
    miss = 0.
    for i, l in enumerate(test_labels):
        if l != pY[i]:
            miss += 1.
    print miss/len(test_labels)
예제 #5
0
파일: train.py 프로젝트: jayshonzs/UFLDL
        theta, cost, info = fmin_l_bfgs_b(J, init_theta, args=((X, Y, lbda, num_classes),), approx_grad=False, maxiter=400)
    except Exception as e:
        print e
    print theta
    print cost
    print info
    return theta


if __name__ == '__main__':
    inputSize = 28*28
    numClasses = 10
    lbda = 1e-4
    init_theta = initialize_theta(inputSize, numClasses)
    X = load_train_imgs(u'../MNISThelper/train-images.idx3-ubyte')
    Y = load_train_labels(u'../MNISThelper/train-labels.idx1-ubyte')
    print X.shape, Y.shape, init_theta.shape
    # numerical gradient check
    '''
    cX = X[:20, :10]
    cY = Y[:10]
    check_numerical_gradient(init_theta[:200], cX, cY, numClasses, lbda=lbda)
    '''
    theta = train(init_theta, X, Y, numClasses, lbda)
    mat = vec2mat(theta, numClasses)
    tX = load_train_imgs('../MNISThelper/t10k-images.idx3-ubyte')
    tY = load_train_labels('../MNISThelper/t10k-labels.idx1-ubyte')
    pY = predict(mat, tX)
    print tY[:20]
    print pY[:20]
    e = 0.