Ejemplo n.º 1
0
def nesterov_momentum(learning_rate):

    X, Y = transform_data()
    X, Y = shuffle(X, Y)
    N = len(X) // 2
    Xtrain = X[:N]
    Ytrain = Y[:N]
    Ttrain = generate_T(Ytrain)
    Xtest = X[N:]
    Ytest = Y[N:]
    Ttest = generate_T(Ytest)
    N, D = Xtrain.shape
    M = 100
    K = len(set(Y))
    iterations = 50
    batch_N = 250
    batches = N // batch_N
    v = np.random.randn(M, K) / np.sqrt(M + K)
    b_1 = np.random.randn(K) / np.sqrt(K)
    w = np.random.randn(D, M) / np.sqrt(D + M)
    b_0 = np.random.randn(M) / np.sqrt(M)
    mu = .9
    dv = 0
    db_1 = 0
    dw = 0
    db_0 = 0
    nesterov_cost = []
    nesterov_cr = []
    best_nesterov = 0
    best_iteration = 0
    for i in range(iterations):
        for b in range(batches):
            X = Xtrain[b * batch_N:(b + 1) * batch_N, :]
            T = Ttrain[b * batch_N:(b + 1) * batch_N, :]
            Y, Z = generate_Y('tanh', X, w, b_0, v, b_1)
            Y_test, _ = generate_Y('tanh', Xtest, w, b_0, v, b_1)
            P_test = np.argmax(Y_test, axis=1)
            if b % batches == 0:
                nesterov_cost.append(cross_entropy(Y_test, Ttest))
                cr = classification_rate(P_test, Ytest)
                nesterov_cr.append(cr)
                if cr > best_nesterov:
                    best_nesterov = cr
                    best_iteration = i
            dv = mu * dv - learning_rate * derivative_v('tanh', Z, Y, T)
            db_1 = mu * db_1 - learning_rate * derivative_b1('tanh', Y, T)
            dw = mu * dw - learning_rate * derivative_w('tanh', X, Y, Z, T, v)
            db_0 = mu * db_0 - learning_rate * derivative_b0(
                'tanh', Y, Z, T, v)
            v += mu * dv - learning_rate * derivative_v('tanh', Z, Y, T)
            b_1 += mu * db_1 - learning_rate * derivative_b1('tanh', Y, T)
            w += mu * dw + learning_rate * derivative_w('tanh', X, Y, Z, T, v)
            b_0 += mu * db_0 - learning_rate * derivative_b0(
                'tanh', Y, Z, T, v)
        if i % 100 == 0:
            print('Nesterov Cost: ', nesterov_cost[i],
                  'Nesterov Classification: ', nesterov_cr[i])
    return nesterov_cost, nesterov_cr, best_nesterov, best_iteration
Ejemplo n.º 2
0
def exp_decay(learning_rate):

    X, Y = transform_data()
    X, Y = shuffle(X, Y)
    N = len(X) // 2
    Xtrain = X[:N]
    Ytrain = Y[:N]
    Ttrain = generate_T(Ytrain)
    Xtest = X[N:]
    Ytest = Y[N:]
    Ttest = generate_T(Ytest)
    N, D = Xtrain.shape
    M = 100
    K = len(set(Y))
    iterations = 50
    batch_N = 250
    batches = N // batch_N
    dv = 0
    d_b1 = 0
    dw = 0
    d_b0 = 0
    mu = .9
    v = np.random.randn(M, K) / np.sqrt(M + K)
    b_1 = np.random.randn(K) / np.sqrt(K)
    w = np.random.randn(D, M) / np.sqrt(D + M)
    b_0 = np.random.randn(M) / np.sqrt(M)
    learning_rate = learning_rate
    exp_cost = []
    exp_cr = []
    exp_lr = []
    best_exp = 0
    best_iteration = 0
    for i in range(iterations):
        learning_rate = learning_rate * np.exp(-K * i)
        exp_lr.append(learning_rate)
        for b in range(batches):
            X = Xtrain[b * batches:(b + 1) * batches, :]
            T = Ttrain[b * batches:(b + 1) * batches, :]
            Y, Z = generate_Y('tanh', X, w, b_0, v, b_1)
            Y_test, _ = generate_Y('tanh', Xtest, w, b_0, v, b_1)
            P_test = np.argmax(Y_test, axis=1)
            if b % batches == 0:
                exp_cost.append(cross_entropy(Y_test, Ttest))
                cr = classification_rate(P_test, Ytest)
                exp_cr.append(cr)
                if cr > best_exp:
                    best_exp = cr
                    best_iteration = i
            dv = mu * dv - learning_rate * derivative_v('tanh', Z, Y, T)
            d_b1 = mu * d_b1 - learning_rate * derivative_b1('tanh', Y, T)
            dw = mu * dw - learning_rate * derivative_w('tanh', X, Y, Z, T, v)
            d_b0 = mu * d_b0 - learning_rate * derivative_b0(
                'tanh', Y, Z, T, v)
            v += dv
            b_1 += d_b1
            w += dw
            b_0 += d_b0
        if i % 10 == 0:
            print('Exp Cost: ', exp_cost[i], 'Exp Classification: ', exp_cr[i])
    return exp_cost, exp_cr, exp_lr, best_exp, best_iteration
Ejemplo n.º 3
0
def grid():

	X,Y = transform_data()
	X,Y = shuffle(X,Y)
	N = len(X)//2
	Xtrain = X[:N]
	Ytrain = Y[:N]
	Ttrain = generate_T(Ytrain)
	Xtest = X[N:]
	Ytest = Y[N:]
	Ttest = generate_T(Ytest)
	N,D = Xtrain.shape
	K = len(set(Y))
	w0 = np.random.randn(D,K)/np.sqrt(D+K)
	b0 = np.random.randn(K)/np.sqrt(K)
	learning_rates = [10**i for i in range(-7,-3,1)]
	momentums = [1-10**i for i in sorted(list(range(-4,0)),reverse=True)]
	iterations = 2000
	best_lr = 0
	best_momentum = 0
	best_cr = 0
	cost = {}
	cr = {}
	for lr in learning_rates:
		learning_rate = lr
		for mu in momentums:
			dw = 0
			db = 0
			cost[(lr,mu)] = list()
			cr[(lr,mu)] = list()
			for i in range(iterations):
				if i == 0:
					A_train = relu(Xtrain.dot(w0) + b0)
					A_test = relu(Xtest.dot(w0) + b0)
				else:
					A_train = relu(Xtrain.dot(w) + b0)
					A_test = relu(Xtest.dot(w) + b0)	
				Y_train = np.exp(A_train)/np.exp(A_train).sum(axis=1,keepdims=True)
				Y_test = np.exp(A_test)/np.exp(A_test).sum(axis=1,keepdims=True)
				P_test = np.argmax(Y_test,axis=1)
				cost[(lr,mu)].append(cross_entropy(Y_test,Ttest))
				current_cr = classification_rate(P_test,Ytest)
				cr[(lr,mu)].append(current_cr)
				if current_cr > best_cr:
					best_cr = current_cr
					best_lr = lr
					best_mu = mu
				dw = mu*dw - (1-mu)*learning_rate*derivative_w(Xtrain,Y_train,Ttrain)
				db = mu*db - (1-mu)*learning_rate*derivative_b(Y_train,Ttrain)
				if i == 0:
					w = w0 + dw
					b = b0 + db
				else:
					w += dw
					b += db
				if i % 100 == 0:
					print('Learning Rate: ',lr,'Momentum: ',mu,'Cost: ',cost[(lr,mu)][i],'Classification Rate: ',cr[(lr,mu)][i])
				if i == (iterations - 1):
					print('')
	return cost,cr,best_lr,best_mu,best_cr
Ejemplo n.º 4
0
def batch(learning_rate):

    X, Y = transform_data()
    X, Y = shuffle(X, Y)
    N = len(X) // 2
    Xtrain = X[:N]
    Ytrain = Y[:N]
    Ttrain = generate_T(Ytrain)
    Xtest = X[N:]
    Ytest = Y[N:]
    Ttest = generate_T(Ytest)
    N, D = Xtrain.shape
    M = 100
    K = len(set(Y))
    iterations = 50
    batch_N = 250
    batches = len(X) // batch_N
    v = np.random.randn(M, K) / np.sqrt(M + K)
    b_1 = np.random.randn(K) / np.sqrt(K)
    w = np.random.randn(D, M) / np.sqrt(D + M)
    b_0 = np.random.randn(M) / np.sqrt(M)
    batch_cost = []
    batch_cr = []
    best_batch = 0
    best_iteration = 0
    for i in range(iterations):
        for b in range(batches):
            X = Xtrain[b * batch_N:(b + 1) * batch_N, :]
            T = Ttrain[b * batch_N:(b + 1) * batch_N, :]
            Y, Z = generate_Y('tanh', X, w, b_0, v, b_1)
            Y_test, _ = generate_Y('tanh', Xtest, w, b_0, v, b_1)
            P_test = np.argmax(Y_test, axis=1)
            if b % batches == 0:
                batch_cost.append(cross_entropy(Y_test, Ttest))
                cr = classification_rate(P_test, Ytest)
                batch_cr.append(cr)
                if cr > best_batch:
                    best_batch = cr
                    best_iteration = i
            v -= learning_rate * derivative_v('tanh', Z, Y, T)
            b_1 -= learning_rate * derivative_b1('tanh', Y, T)
            w -= learning_rate * derivative_w('tanh', X, Y, Z, T, v)
            b_0 -= learning_rate * derivative_b0('tanh', Y, Z, T, v)
        if i % 100 == 0:
            print('Batch Cost: ', batch_cost[i], 'Batch Classification: ',
                  batch_cr[i])
    return batch_cost, batch_cr, best_batch, best_iteration
Ejemplo n.º 5
0
def rmsprop(learning_rate):

    X, Y = transform_data()
    X, Y = shuffle(X, Y)
    N = len(X) // 2
    Xtrain = X[:N]
    Ytrain = Y[:N]
    Ttrain = generate_T(Ytrain)
    Xtest = X[N:]
    Ytest = Y[N:]
    Ttest = generate_T(Ytest)
    Ttest = generate_T(Ytest)
    N, D = Xtrain.shape
    M = 100
    K = len(set(Y))
    iterations = 50
    batch_N = 250
    batches = N // batch_N
    dv = 0
    d_b1 = 0
    dw = 0
    d_b0 = 0
    mu = .9
    v = np.random.randn(M, K) / np.sqrt(M + K)
    b_1 = np.random.randn(K) / np.sqrt(K)
    w = np.random.randn(D, M) / np.sqrt(D + M)
    b_0 = np.random.randn(M) / np.sqrt(M)
    cache_v = np.ones((M, K))
    cache_b1 = np.ones(K)
    cache_w = np.ones((D, M))
    cache_b0 = np.ones(M)
    epsilon = 10e-10
    decay = .9
    rmsprop_cost = []
    rmsprop_cr = []
    best_rms = 0
    best_iteration = 0
    for i in range(iterations):
        for b in range(batches):
            X = Xtrain[b * batches:(b + 1) * batches, :]
            T = Ttrain[b * batches:(b + 1) * batches, :]
            Y, Z = generate_Y('tanh', X, w, b_0, v, b_1)
            Y_test, _ = generate_Y('tanh', Xtest, w, b_0, v, b_1)
            P_test = np.argmax(Y_test, axis=1)
            if b % batches == 0:
                rmsprop_cost.append(cross_entropy(Y_test, Ttest))
                cr = classification_rate(P_test, Ytest)
                rmsprop_cr.append(cr)
                if cr > best_rms:
                    best_rms = cr
                    best_iteration = i
            cache_v = decay * cache_v + (1 - decay) * derivative_v(
                'tanh', Z, Y, T)**2
            cache_b1 = decay * cache_b1 + (1 - decay) * derivative_b1(
                'tanh', Y, T)**2
            cache_w = decay * cache_w + (1 - decay) * derivative_w(
                'tanh', X, Y, Z, T, v)**2
            cache_b0 = decay * cache_b0 + (1 - decay) * derivative_b0(
                'tanh', Y, Z, T, v)**2
            dv = mu * dv - learning_rate * derivative_v(
                'tanh', Z, Y, T) / (np.sqrt(cache_v + epsilon))
            d_b1 = mu * d_b1 - learning_rate * derivative_b1(
                'tanh', Y, T) / (np.sqrt(cache_b1 + epsilon))
            dw = mu * dw - learning_rate * derivative_w(
                'tanh', X, Y, Z, T, v) / (np.sqrt(cache_w + epsilon))
            d_b0 = mu * d_b0 - learning_rate * derivative_b0(
                'tanh', Y, Z, T, v) / (np.sqrt(cache_b0 + epsilon))
            v += dv
            b_1 += d_b1
            w += dw
            b_0 += d_b0
        if i % 10 == 0:
            print('RMSProp Cost: ', rmsprop_cost[i],
                  'RMSProp Classification: ', rmsprop_cr[i])
    return rmsprop_cost, rmsprop_cr, best_rms, best_iteration
Ejemplo n.º 6
0
    def full(self):

        for i in range(self.iterations):
            Y_train, Z = generate_Y(self.activation, self.Xtrain, self.w,
                                    self.b_0, self.v, self.b_1)
            P_train = np.argmax(Y_train, axis=1)
            Y_test, _ = generate_Y(self.activation, self.Xtest, self.w,
                                   self.b_0, self.v, self.b_1)
            P_test = np.argmax(Y_test, axis=1)
            self.train_cost.append(cross_entropy(Y_train, self.Ttrain))
            self.test_cost.append(cross_entropy(Y_test, self.Ttest))
            train_cr = classification_rate(P_train, self.Ytrain)
            self.train_cr.append(train_cr)
            test_cr = classification_rate(P_test, self.Ytest)
            self.test_cr.append(test_cr)
            if train_cr > self.best_train:
                self.best_train = train_cr
                self.train_iteration = i
            if test_cr > self.best_test:
                self.best_test = test_cr
                self.test_iteration = i
            self.m_v = self.decay_0 * self.m_v + (
                1 - self.decay_0) * derivative_v(self.activation, Z, Y_train,
                                                 self.Ttrain)
            self.dm_v = self.m_v / (1 - self.decay_0**(i + 1))
            self.v_v = self.decay_1 * self.v_v + (
                1 - self.decay_1) * derivative_v(self.activation, Z, Y_train,
                                                 self.Ttrain)**2
            self.dv_v = self.v_v / (1 - self.decay_1**(i + 1))
            self.m_b1 = self.decay_0 * self.m_b1 + (
                1 - self.decay_0) * derivative_b1(self.activation, Y_train,
                                                  self.Ttrain)
            self.dm_b1 = self.m_b1 / (1 - self.decay_0**(i + 1))
            self.v_b1 = self.decay_1 * self.v_b1 + (
                1 - self.decay_1) * derivative_b1(self.activation, Y_train,
                                                  self.Ttrain)**2
            self.dv_b1 = self.v_b1 / (1 - self.decay_1**(i + 1))
            self.m_w = self.decay_0 * self.m_w + (
                1 - self.decay_0) * derivative_w(self.activation, self.Xtrain,
                                                 Y_train, Z, self.Ttrain,
                                                 self.v)
            self.dm_w = self.m_w / (1 - self.decay_0**(i + 1))
            self.v_w = self.decay_1 * self.v_w + (
                1 - self.decay_1) * derivative_w(self.activation, self.Xtrain,
                                                 Y_train, Z, self.Ttrain,
                                                 self.v)**2
            self.dv_w = self.v_w / (1 - self.decay_1**(i + 1))
            self.m_b0 = self.decay_0 * self.m_b0 + (
                1 - self.decay_0) * derivative_b0(self.activation, Y_train, Z,
                                                  self.Ttrain, self.v)
            self.dm_b0 = self.m_b0 / (1 - self.decay_0**(i + 1))
            self.v_b0 = self.decay_1 * self.v_b0 + (
                1 - self.decay_1) * derivative_b0(self.activation, Y_train, Z,
                                                  self.Ttrain, self.v)**2
            self.dv_b0 = self.v_b0 / (1 - self.decay_1**(i + 1))
            self.v -= self.learning_rate * self.dm_v / (np.sqrt(self.dv_v +
                                                                self.epsilon))
            self.b_1 -= self.learning_rate * self.dm_b1 / (
                np.sqrt(self.dv_b1 + self.epsilon))
            self.w -= self.learning_rate * self.dm_w / (np.sqrt(self.dv_w +
                                                                self.epsilon))
            self.b_0 -= self.learning_rate * self.dm_b0 / (
                np.sqrt(self.dv_b0 + self.epsilon))
            if i % 100 == 0:
                print(i, 'Train Cost: ', self.train_cost[i],
                      'Train Classification Rate: ', self.train_cr[i])
Ejemplo n.º 7
0
    def stochastic(self, samples):

        for i in range(self.iterations):
            current_X, current_T = shuffle(self.Xtrain, self.Ttrain)
            for s in range(samples):
                X = current_X[s, :].reshape(1, current_X.shape[1])
                T = current_T[s, :].reshape(1, current_T.shape[1])
                Y, Z = generate_Y(self.activation, X, self.w, self.b_0, self.v,
                                  self.b_1)
                Y_train, _ = generate_Y(self.activation, self.Xtrain, self.w,
                                        self.b_0, self.v, self.b_1)
                P_train = np.argmax(Y_train, axis=1)
                Y_test, _ = generate_Y(self.activation, self.Xtest, self.w,
                                       self.b_0, self.v, self.b_1)
                P_test = np.argmax(Y_test, axis=1)
                self.train_cost.append(cross_entropy(Y_train, self.Ttrain))
                self.test_cost.append(cross_entropy(Y_test, self.Ttest))
                train_cr = classification_rate(P_train, self.Ytrain)
                self.train_cr.append(train_cr)
                test_cr = classification_rate(P_test, self.Ytest)
                self.test_cr.append(test_cr)
                if train_cr > self.best_train:
                    self.best_train = train_cr
                    self.train_iteration = i
                if test_cr > self.best_test:
                    self.best_test = test_cr
                    self.test_iteration = i
                self.m_v = self.decay_0 * self.m_v + (
                    1 - self.decay_0) * derivative_v(self.activation, Z, Y, T)
                self.dm_v = self.m_v / (1 - self.decay_0**(i + 1))
                self.v_v = self.decay_1 * self.v_v + (
                    1 - self.decay_1) * derivative_v(self.activation, Z, Y,
                                                     T)**2
                self.dv_v = self.v_v / (1 - self.decay_1**(i + 1))
                self.m_b1 = self.decay_0 * self.m_b1 + (
                    1 - self.decay_0) * derivative_b1(self.activation, Y, T)
                self.dm_b1 = self.m_b1 / (1 - self.decay_0**(i + 1))
                self.v_b1 = self.decay_1 * self.v_b1 + (
                    1 - self.decay_1) * derivative_b1(self.activation, Y, T)**2
                self.dv_b1 = self.v_b1 / (1 - self.decay_1**(i + 1))
                self.m_w = self.decay_0 * self.m_w + (
                    1 - self.decay_0) * derivative_w(self.activation, X, Y, Z,
                                                     T, self.v)
                self.dm_w = self.m_w / (1 - self.decay_0**(i + 1))
                self.v_w = self.decay_1 * self.v_w + (
                    1 - self.decay_1) * derivative_w(self.activation, X, Y, Z,
                                                     T, self.v)**2
                self.dv_w = self.v_w / (1 - self.decay_1**(i + 1))
                self.m_b0 = self.decay_0 * self.m_b0 + (
                    1 - self.decay_0) * derivative_b0(self.activation, Y, Z, T,
                                                      self.v)
                self.dm_b0 = self.m_b0 / (1 - self.decay_0**(i + 1))
                self.v_b0 = self.decay_1 * self.v_b0 + (
                    1 - self.decay_1) * derivative_b0(self.activation, Y, Z, T,
                                                      self.v)**2
                self.dv_b0 = self.v_b0 / (1 - self.decay_1**(i + 1))
                self.v -= self.learning_rate * self.dm_v / (
                    np.sqrt(self.dv_v + self.epsilon))
                self.b_1 -= self.learning_rate * self.dm_b1 / (
                    np.sqrt(self.dv_b1 + self.epsilon))
                self.w -= self.learning_rate * self.dm_w / (
                    np.sqrt(self.dv_w + self.epsilon))
                self.b_0 -= self.learning_rate * self.dm_b0 / (
                    np.sqrt(self.dv_b0 + self.epsilon))
            if i % 100 == 0:
                print(i, 'Train Cost: ', self.train_cost[i],
                      'Train Classification Rate: ', self.train_cr[i])