def runNN(n_hidden_max): losses = [] #training set losses_0 = [] #validation set ##(swap for more hidden nodes) for n_hidden in range(2, 3): #(1,n_hidden_max+1): #print(n_hidden) eta = 0.001 ndata = 50 #n_hidden = 3 # Training Data sets A and B mA, sigmaA = [0.0, -0.1], 0.3 mB, sigmaB = [1.0, 0.3], 0.2 mC, sigmaC = [-1.0, -0.3], 0.2 #mD, sigmaD = [3, 4], 0.7 ## Really quick just added a second set C to check this actually works on lin non-seperable. ## @Bryan please plug in the correct data set A = _3_1_1_lin_seperable_data.generateData(2, mA, sigmaA, math.floor(ndata * 2)) B = _3_1_1_lin_seperable_data.generateData(2, mB, sigmaB, ndata) C = _3_1_1_lin_seperable_data.generateData(2, mC, sigmaC, ndata) # D = _3_1_1_lin_seperable_data.generateData(2, mD, sigmaD, ndata) B = np.hstack([B, C]) # B = np.hstack([B,D]) #create validation set A_0 = A[:, 0] B_0 = B[:, 0] for _i in range(int(round(ndata * 2 * .25)) - 1): _e = np.random.randint(A.shape[1]) A_0 = np.vstack([A_0, A[:, _e]]) A = np.delete(A, _e, axis=1) B_0 = np.vstack([B_0, B[:, _e]]) B = np.delete(B, _e, axis=1) A_0 = np.transpose(A_0) B_0 = np.transpose(B_0) X = np.hstack([A, B]) X_0 = np.hstack([A_0, B_0]) # Add biasing term X = np.vstack([X, np.ones(X.shape[1])]) X_0 = np.vstack([X_0, np.ones(X_0.shape[1])]) # Targets T_A, T_B = np.ones(A.shape[1]), -1 * np.ones(B.shape[1]) T = np.hstack([T_A, T_B]) T_A_0, T_B_0 = np.ones(A_0.shape[1]), -1 * np.ones(B_0.shape[1]) T_0 = np.hstack([T_A_0, T_B_0]) # Wj = weights at layer j # Wk = weights at layer k # Wkj has a + 1 to reflect the weights associated with the bias Wji = _3_1_2_single_layer_perceptron.initializeWeights( X.shape[0], n_hidden) Wkj = _3_1_2_single_layer_perceptron.initializeWeights(n_hidden + 1, 1) layer1 = Layer(activation=v_sigmoid, d_activation=v_d_sigmoid) layer2 = Layer(activation=v_sigmoid, d_activation=v_d_sigmoid) # For matplotlib window maxX = X[:, np.argmax(X[0])][0] + 0.5 minX = X[:, np.argmin(X[0])][0] - 0.5 maxY = X[:, np.argmax(X[1])][1] + 0.5 minY = X[:, np.argmin(X[1])][1] - 0.5 eta = 0.01 plt.ion() start = time.time() for epoch in range(1000): def batch_learning(Wji, Wkj): # +-------+ +-------+ # X(3x1)+>Affine1+--->Hj_in(4x1)+->Sigmoid+->Hj_out(4x1) # +-------+ +-------+ # +---------+ # Wji(4x3) |D_sigmoid+->Hj_phid(4x1) # +---------+ h_out = layer1.forward(Wji, X, pad=True) # +-------+ +-------+ # Hj_out+>Affine1+--->Yk_in(1x1)+->Sigmoid+->Yk_out(1x1) # Biased +-------+ +-------+ # (5x1) +---------+ # Wji(1x5) |D_sigmoid+->Yk_phid(1x1) # +---------+ y_out = layer2.forward(Wkj, h_out, pad=False) classification_percent = np.sum(np.sign(y_out) == T) / len(T) error = _3_1_2_single_layer_perceptron.error(T, y_out) error = error / len(T) losses.append(error) print( f"Epoch: {epoch} Error: {error} Percentage: {classification_percent}" ) d_y_out = y_out - T d_y_in = layer2.d_in(d_y_out) DELTA_Wkj = DELTA(eta, d_y_in, h_out) d_h_out = layer1.d_out_hidden(d_y_in, Wkj) d_h_in = layer1.d_in(d_h_out) DELTA_Wji = DELTA(eta, d_h_in, X)[:-1, :] h_out_0 = layer1.forward(Wji, X_0, pad=True) y_out_0 = layer2.forward(Wkj, h_out_0, pad=False) error_0 = _3_1_2_single_layer_perceptron.error(T_0, y_out_0) error_0 = error_0 / len(T_0) losses_0.append(error_0) Wji = Wji + DELTA_Wji Wkj = Wkj + DELTA_Wkj return Wji, Wkj Wji, Wkj = batch_learning(Wji, Wkj) #print (max(np.max(DELTA_Wji), np.max(DELTA_Wji))) def sequential_learning(Wji, Wkj): Wji_old = np.copy(Wji) Wkj_old = np.copy(Wkj) classification_percent = 0.0 error = 0.0 for i in range(len(T)): X_sample = np.transpose(np.atleast_2d(X[:, i])) T_sample = np.atleast_2d(T[i]) #pass forward layer 1 layer_1_IN = _3_1_2_single_layer_perceptron.forwardPass( Wji, X_sample) h_out = v_sigmoid(layer_1_IN) layer_1_PHID = v_d_sigmoid(layer_1_IN) #bias layer 1 padding = np.ones(h_out.shape[1]) h_out = np.vstack([h_out, padding]) layer_1_PHID = np.vstack([layer_1_PHID, padding]) #pass forward layer 2 layer_2_IN = _3_1_2_single_layer_perceptron.forwardPass( Wkj, h_out) y_out = v_sigmoid(layer_2_IN) layer_2_PHID = v_d_sigmoid(layer_2_IN) classification_percent += np.sum( np.sign(y_out) == T_sample) error += _3_1_2_single_layer_perceptron.error( T_sample, y_out) #backprop d_y_out = y_out - T_sample d_y_in = d_y_out * layer_2_PHID DELTA_Wkj = DELTA(eta, d_y_in, h_out) d_h_out = np.matmul(np.transpose(Wkj), d_y_in) d_h_in = d_h_out * layer_1_PHID DELTA_Wji = DELTA(eta, d_h_in, X_sample)[:-1, :] Wji = Wji + DELTA_Wji Wkj = Wkj + DELTA_Wkj print( f"Epoch: {epoch} Error: {error} Percentage: {classification_percent/len(T)}" ) error = error / len(T) losses.append(error) h_out_0 = layer1.forward(Wji_old, X_0, pad=True) y_out_0 = layer2.forward(Wkj_old, h_out_0, pad=False) error_0 = _3_1_2_single_layer_perceptron.error(T_0, y_out_0) error_0 = error_0 / len(T_0) losses_0.append(error_0) return Wji, Wkj #Wji, Wkj = sequential_learning(Wji, Wkj) __x = np.arange(-5, 5, 0.5) def decisionBoundary(x, normal): # 0 = w0x + w1y + w2 # y = (-w0x - w2) / w1 if normal.shape[0] == 3: return (-normal[0] * x - normal[2]) / normal[1] else: return (-normal[0] * x) / normal[1] def plot(n_hidden_layer): plt.clf() plt.ylim(minY, maxY) plt.xlim(minX, maxX) plt.plot(A[0], A[1], "ro", B[0], B[1], "bo") for _i in range(n_hidden_layer): plt.plot(__x, decisionBoundary(__x, np.transpose(Wji[_i, :])), label=f"hidden node: {_i}") plt.title("Grid") plt.xlabel("X-Coord") plt.ylabel("Y-Coord") plt.legend() plt.show() plt.pause(0.0000001) #plot() elapsed = time.time() - start print(f"\nElapsed: {elapsed}") print(Wji) print(Wkj) plt.ioff() if (n_hidden == 2): plot(n_hidden) return losses, losses_0
def main(): train_set_percentage = TRAIN_SET_PERCENTAGE hidden_layer_nodes = HIDDEN_LAYER_NODES # Data Set patterns, target = makeData() fig = plt.figure() ax = fig.add_subplot(111, projection="3d") ax.scatter(xs=patterns[0], ys=patterns[1], zs=target.flatten()) plt.show() exit() # Subsample Train and Test sets if MAKE_VALIDATE_SET: num_samples = target.shape[1] args = np.arange(num_samples) np.random.shuffle(args) split_idx = int(num_samples * (train_set_percentage)) train_args, test_args = args[:split_idx], args[split_idx:] patterns_test, patterns = patterns[:, test_args], patterns[:, train_args] target_test, target = target[:, test_args], target[:, train_args] W_1 = _3_1_2_single_layer_perceptron.initializeWeights( 3, hidden_layer_nodes) W_2 = _3_1_2_single_layer_perceptron.initializeWeights( hidden_layer_nodes + 1, 1) layer1 = _3_2_1.Layer(activation=_3_2_1.v_sigmoid, d_activation=_3_2_1.v_d_sigmoid) layer2 = _3_2_1.Layer(activation=np.vectorize(lambda x: x), d_activation=np.vectorize(lambda x: 1)) train_loss = [] test_loss = [] for epoch in range(EPOCHS): ## Training h_out = layer1.forward(W_1, patterns, pad=True) y_out = layer2.forward(W_2, h_out, pad=False) error = _3_1_2_single_layer_perceptron.error(target, y_out) d_y_out = y_out - target d_y_in = layer2.d_in(d_y_out) DELTA_W_2 = _3_2_1.DELTA(ETA, d_y_in, h_out) d_h_out = layer1.d_out_hidden(d_y_in, W_2) # trim off last term of d_h_in since h_in was padded with bias d_h_in = layer1.d_in(d_h_out)[:-1] DELTA_W_1 = _3_2_1.DELTA(ETA, d_h_in, patterns) train_loss.append(error) ## Test test_error = 0 if MAKE_VALIDATE_SET: h_out_test = layer1.forward(W_1, patterns_test, pad=True) y_out_test = layer2.forward(W_2, h_out_test, pad=False) test_error = _3_1_2_single_layer_perceptron.mse( target_test, y_out_test) test_loss.append(test_error) print(f"Epoch: {epoch} Train error: {error} Test Error: {test_error}") W_1 = W_1 + DELTA_W_1 W_2 = W_2 + DELTA_W_2 deltaMax = max(np.max(DELTA_W_1), np.max(DELTA_W_2)) if deltaMax < 10**-5: break fig = plt.figure() ax = fig.add_subplot(111, projection="3d") ax.scatter(xs=patterns[0], ys=patterns[1], zs=y_out.flatten()) plt.show() return train_loss, test_loss
def runNN(): eta = 0.001 ndata = 100 n_hidden = 4 vsigmoid = np.vectorize(sigmoid) vd_sigmoid = np.vectorize(d_sigmoid) # Training Data sets A and B mA, sigmaA = [1.0, 0.5], 0.5 mB, sigmaB = [-2.0, 0.0], 0.5 mC, sigmaC = [5, 0.0], 0.5 ## Really quick just added a second set C to check this actually works on lin non-seperable. ## @Bryan please plug in the correct data set A = _3_1_1_lin_seperable_data.generateData(2, mA, sigmaA, ndata) B = _3_1_1_lin_seperable_data.generateData(2, mB, sigmaB, ndata) C = _3_1_1_lin_seperable_data.generateData(2, mC, sigmaC, ndata) B = np.hstack([B, C]) X = np.hstack([A, B]) # Add biasing term X = np.vstack([X, np.ones(X.shape[1])]) # Targets T_A, T_B = np.ones(A.shape[1]), -1 * np.ones(B.shape[1]) T = np.hstack([T_A, T_B]) # Wj = weights at layer j # Wk = weights at layer k # Wkj has a + 1 to reflect the weights associated with the bias Wji = _3_1_2_single_layer_perceptron.initializeWeights( X.shape[0], n_hidden) Wkj = _3_1_2_single_layer_perceptron.initializeWeights(n_hidden + 1, 1) losses = [] plt.plot(A[0], A[1], "ro", B[0], B[1], "bo") plt.show() for epoch in range(100): # +-------+ +-------+ # X(3x1)+>Affine1+--->Hj_in(4x1)+->Sigmoid+->Hj_out(4x1) # +-------+ +-------+ # +---------+ # Wji(4x3) |D_sigmoid+->Hj_phid(4x1) # +---------+ # forward pass # Hj_in = input to layer J # Hj_out = output at layer j # Yk= output at layer k Hj_in = _3_1_2_single_layer_perceptron.forwardPass(Wji, X) Hj_bias = np.ones(Hj_in.shape[1]) Hj_out = np.vstack([vsigmoid(Hj_in), Hj_bias]) Hj_phid = np.vstack([vd_sigmoid(Hj_in), Hj_bias]) # +-------+ +-------+ # Hj_out+>Affine1+--->Yk_in(1x1)+->Sigmoid+->Yk_out(1x1) # Biased +-------+ +-------+ # (5x1) +---------+ # Wji(1x5) |D_sigmoid+->Yk_phid(1x1) # +---------+ Yk_in = _3_1_2_single_layer_perceptron.forwardPass(Wkj, Hj_out) Yk_out = vsigmoid(Yk_in) Yk_phid = vd_sigmoid(Yk_in) classification_percent = np.sum(np.sign(Yk_out) == T) / len(T) error = _3_1_2_single_layer_perceptron.error(T, Yk_out) losses.append(error) print( f"Epoch: {epoch} Error: {error} Percentage: {classification_percent}" ) eta = 0.001 d_k = dFinal(T, Yk_out, Yk_phid) DELTA_Wkj = DELTA(eta, d_k, Hj_out) d_j = dHidden(d_k, Wkj, Hj_phid) DELTA_Wji = DELTA(eta, d_j, X)[:-1, :] Wji = Wji + DELTA_Wji Wkj = Wkj + DELTA_Wkj print(max(np.max(DELTA_Wji), np.max(DELTA_Wji))) if max(np.max(DELTA_Wji), np.max(DELTA_Wji)) < 10 ^ -6: break return losses
def runNN(): eta = 0.1 ndata = 100 vsigmoid = np.vectorize(_3_2_1_two_layer_network.sigmoid) vd_sigmoid = np.vectorize(_3_2_1_two_layer_network.d_sigmoid) #define nodes n_hidden = 3 n_first = 8 n_last = n_first # Training Data sets A A = generate_Data(n_first) print(A.shape[0]) X = np.vstack([A, np.ones(A.shape[1])]) #Targets = Training Data #random weights Wji = _3_1_2_single_layer_perceptron.initializeWeights( X.shape[0], n_hidden) Wkj = _3_1_2_single_layer_perceptron.initializeWeights( n_hidden + 1, n_last) losses = [] for epoch in range(5000): Hj_in = _3_1_2_single_layer_perceptron.forwardPass(Wji, X) Hj_bias = np.ones(Hj_in.shape[1]) Hj_out = np.vstack([vsigmoid(Hj_in), Hj_bias]) Hj_phid = np.vstack([vd_sigmoid(Hj_in), Hj_bias]) Yk_in = _3_1_2_single_layer_perceptron.forwardPass(Wkj, Hj_out) Yk_out = vsigmoid(Yk_in) Yk_phid = vd_sigmoid(Yk_in) classification_percent = np.sum(np.sign(Yk_out) == A) / len(A) ##T error = _3_1_2_single_layer_perceptron.error(A, Yk_out) losses.append(error) print( f"Epoch: {epoch} Error: {error} Percentage: {classification_percent}" ) d_k = _3_2_1_two_layer_network.dFinal(A, Yk_out, Yk_phid) DELTA_Wkj = _3_2_1_two_layer_network.DELTA(eta, d_k, Hj_out) d_j = _3_2_1_two_layer_network.dHidden(d_k, Wkj, Hj_phid) DELTA_Wji = _3_2_1_two_layer_network.DELTA(eta, d_j, X)[:-1, :] Wji = Wji + DELTA_Wji Wkj = Wkj + DELTA_Wkj print(max(np.max(DELTA_Wji), np.max(DELTA_Wji))) if max(np.max(DELTA_Wji), np.max(DELTA_Wji)) < 10 ^ -6: break #corresponding internal code (representing binary conversion) #-> should be impossible for 2 hidden layer (can be seen at Yk_out) #with enough steps (50K is enough) error goes close to 0. #network "encodes" Data in strength of hj_out an in incredible high weights of Wkj. #hj_out is either 0.3 0.5 or 1 (and those negated) # print(Wji) print(np.sign(np.transpose(Hj_out[:-1]))) print(np.all(np.sign(Yk_out) == A)) for i in range(8): print( f"{np.transpose(A)[i]} & {np.sign(np.transpose(Hj_out[:-1])[i])}") print(np.sign(Wji)) return losses
def main(): eta = 0.001 ndata = 100 subsampleCase = 3 part = 2 plt.ion() # 3.1.3 first part (Overlapping clouds) if part == 1: mA, sigmaA = [1.0, 0.3], 0.2 mB, sigmaB = [0.5, 0.0], 0.3 A = _3_1_1_lin_seperable_data.generateData(2, mA, sigmaA, ndata) B = _3_1_1_lin_seperable_data.generateData(2, mB, sigmaB, ndata) plt.plot(A[0],A[1],"bo") plt.plot(B[0],B[1],"ro") plt.show() # 3.1.3 second part (split class A into 2 parts) elif part == 2: #Training Data sets A and B mA, sigmaA = [1.0, 0.3], 0.2 mB, sigmaB = [0.0, -0.1], 0.3 A = generateNonlinearData(2, mA, sigmaA, ndata) B = _3_1_1_lin_seperable_data.generateData(2, mB, sigmaB, ndata) #plt.plot(A[0],A[1],"go") #plt.plot(B[0],B[1],"mo") #plt.show() # Subselection #subsampleCase == 0: A_0 = np.asarray([np.random.choice(i, int(round(ndata * .75)), replace=False) for i in A]) B_0 = np.asarray([np.random.choice(i, int(round(ndata * .75)), replace=False) for i in B]) #subsampleCase == 1: A_1 = np.asarray([np.random.choice(i, int(round(ndata * .50)), replace=False) for i in A]) B_1 = B #subsampleCase == 2: A_2 = A B_2 = np.asarray([np.random.choice(i, int(round(ndata * .50)), replace=False) for i in B]) #subsampleCase == 3: #TODO: apply to entire Class A not A[0] #A_g = greater then 0 in A[0,:] A_g = A[:,:50] A_l = A[:,50:] A_l_1 = A_l[:, np.random.choice(A_l.shape[1], int(round(A_l.shape[1] * 0.80)), replace=False)] #only take 80% A_g_1 = A_g[:, np.random.choice(A_g.shape[1], int(round(A_g.shape[1] * 0.20)), replace=False)] #only take 20% A_3 = np.hstack([A_l_1,A_g_1]) B_3 = B X_0 = np.hstack([A_0, B_0]) T_A_0, T_B_0 = np.ones(A_0.shape[1]), -1 * np.ones(B_0.shape[1]) X_1 = np.hstack([A_1, B_1]) T_A_1, T_B_1 = np.ones(A_1.shape[1]), -1 * np.ones(B_1.shape[1]) X_2 = np.hstack([A_2, B_2]) T_A_2, T_B_2 = np.ones(A_2.shape[1]), -1 * np.ones(B_2.shape[1]) X_3 = np.hstack([A_3, B_3]) T_A_3, T_B_3 = np.ones(A_3.shape[1]), -1 * np.ones(B_3.shape[1]) # Add biasing term X_0 = np.vstack([X_0, np.ones(X_0.shape[1])]) X_1 = np.vstack([X_1, np.ones(X_1.shape[1])]) X_2 = np.vstack([X_2, np.ones(X_2.shape[1])]) X_3 = np.vstack([X_3, np.ones(X_3.shape[1])]) # Targets T_0 = np.hstack([T_A_0, T_B_0]) T_1 = np.hstack([T_A_1, T_B_1]) T_2 = np.hstack([T_A_2, T_B_2]) T_3 = np.hstack([T_A_3, T_B_3]) W_0 = _3_1_2_single_layer_perceptron.initializeWeights(X_0.shape[0], 1) W_1 = _3_1_2_single_layer_perceptron.initializeWeights(X_1.shape[0], 1) W_2 = _3_1_2_single_layer_perceptron.initializeWeights(X_2.shape[0], 1) W_3 = _3_1_2_single_layer_perceptron.initializeWeights(X_3.shape[0], 1) # print (W.shape) losses_0 = [] losses_1 = [] losses_2 = [] losses_3 = [] start = time.time() #train in batch mode for i in range(2): for _ in range(ndata): Y_0 = _3_1_2_single_layer_perceptron.forwardPass(W_0, X_0) Y_1 = _3_1_2_single_layer_perceptron.forwardPass(W_1, X_1) Y_2 = _3_1_2_single_layer_perceptron.forwardPass(W_2, X_2) Y_3 = _3_1_2_single_layer_perceptron.forwardPass(W_3, X_3) error_0 = _3_1_2_single_layer_perceptron.error(T_0,Y_0) error_1 = _3_1_2_single_layer_perceptron.error(T_1,Y_1) error_2 = _3_1_2_single_layer_perceptron.error(T_2,Y_2) error_3 = _3_1_2_single_layer_perceptron.error(T_3,Y_3) #print("part1.error: ", _, " ", error) losses_0.append(error_0) losses_1.append(error_1) losses_2.append(error_2) losses_3.append(error_3) dW_0 = -eta * np.matmul((Y_0 - T_0), np.transpose(X_0)) dW_1 = -eta * np.matmul((Y_1 - T_1), np.transpose(X_1)) dW_2 = -eta * np.matmul((Y_2 - T_2), np.transpose(X_2)) dW_3 = -eta * np.matmul((Y_3 - T_3), np.transpose(X_3)) W_0 = W_0 + dW_0 W_1 = W_1 + dW_1 W_2 = W_2 + dW_2 W_3 = W_3 + dW_3 # def line(x, normal): # 0 = w0x + w1y + w2 # y = (-w0x - w2) / w1 return (-normal[:, 0] * x - normal[:, 2]) / normal[:, 1] X = np.hstack([X_0,X_1,X_2,X_3]) maxX = X[:, np.argmax(X[0])][0] + 0.5 minX = X[:, np.argmin(X[0])][0] - 0.5 maxY = X[:, np.argmax(X[1])][1] + 0.5 minY = X[:, np.argmin(X[1])][1] - 0.5 __x = np.arange(minX, maxX, 0.05) # plt.plot(x_1, line(x_1, W_1)) # plt.plot(x_2, line(x_2, W_2)) # plt.plot(x_3, line(x_3, W_3)) def plot(): plt.clf() plt.ylim(minY, maxY) plt.xlim(minX, maxX) plt.plot(A[0], A[1], "ro", B[0], B[1], "bo") plt.plot(__x, _3_1_2_single_layer_perceptron.decisionBoundary(__x, W_0), label="25 each") plt.plot(__x, _3_1_2_single_layer_perceptron.decisionBoundary(__x, W_1), label="50 A") plt.plot(__x, _3_1_2_single_layer_perceptron.decisionBoundary(__x, W_2), label="50 B") plt.plot(__x, _3_1_2_single_layer_perceptron.decisionBoundary(__x, W_3), label="splitted A") plt.title("Grid") plt.xlabel("X-Coord") plt.ylabel("Y-Coord") plt.legend() plt.show() plt.pause(0.000001) plot() elapsed = time.time() - start print(f"\nElapsed: {elapsed}") plt.ioff() plot() return losses_0, losses_1, losses_2, losses_3