def predict(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, W1) + b1 z1 = activation_function.sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = activation_function.sigmoid(a2) a3 = np.dot(z2, W3) + b3 return activation_function.softmax(a3)
def forward(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, W1) + b1 z1 = af.sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = af.sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = af.identity_function(a3) return y
def forward(network, x): W1, W2, W3 = network["W1"], network["W2"], network["W3"] b1, b2, b3 = network["b1"], network["b2"], network["b3"] a1 = np.dot(x, W1) + b1 z1 = af.sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = af.sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = a3 return y
def predict(network, x): W1, W2, W3 = network["W1"], network["W2"], network["W3"] b1, b2, b3 = network["b1"], network["b2"], network["b3"] a1 = np.dot(x, W1) + b1 z1 = af.sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = af.sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = af.softmax(a3) return y
def predict(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = softmax(a3) return y
def predict(network, x): w1, w2, w3 = network["W1"], network["W2"], network["W3"] b1, b2, b3 = network["b1"], network["b2"], network["b3"] a1 = np.dot(x, w1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, w2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, w3) + b3 y = softmax(a3) return y
def forword(network, x): w1, w2, w3 = network["w1"], network["w2"], network["w3"] b1, b2, b3 = network["b1"], network["b2"], network["b3"] a1 = np.dot(x, w1) + b1 z1 = sigmoid(a1) a2 = np.dot(a1, w2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, w3) + b3 y = identify_function(a3) return y
def forward(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, W1) + b1 z1 = af.sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = af.sigmoid(a2) a3 = np.dot(z2, W3) + b3 z3 = af.sigmoid(a3) y = z3 return y
def linear_activation_forward(A_prev, W, b, activation): """ Implement the forward propagation for the LINEAR->ACTIVATION layer Arguments: A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples) W -- weights matrix: numpy array of shape (size of current layer, size of previous layer) b -- bias vector, numpy array of shape (size of the current layer, 1) activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: A -- the output of the activation function, also called the post-activation value cache -- a python tuple containing "linear_cache" and "activation_cache"; stored for computing the backward pass efficiently """ if (activation == 'relu'): Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = relu(Z) elif (activation == 'sigmoid'): Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = sigmoid(Z) cache = (linear_cache, activation_cache) return A, cache
def epoch(eta=0.04, penalty=0.4, epochs=200, mini_batch_size=100, t0=5, t1=50, create_conf=False): layer1 = DenseLayer(features, 100, sigmoid()) #layer2 = DenseLayer(100, 50, sigmoid()) #layer3 = DenseLayer(100, 50, sigmoid()) layer4 = DenseLayer(100, 10, softmax()) layers = [layer1, layer4] network = NN(layers) cost_array = np.zeros((epochs, 2)) def learning_schedule(t): return 0.04 #t0/(t+t1) for i in range(epochs): random.shuffle(batch) X_train_shuffle = X_train[batch] one_hot_shuffle = one_hot[batch] Y_train_shuffle = Y_train[batch] #eta = learning_schedule(i) network.SGD(ce, 100, X_train_shuffle, one_hot_shuffle, eta, penalty) Y_pred = np.argmax(network.feedforward(X_test), axis=1) Y_pred_train = np.argmax(network.feedforward(X_train_shuffle), axis=1) cost_array[i, 0] = accuracy()(Y_test.ravel(), Y_pred) cost_array[i, 1] = accuracy()(Y_train_shuffle.ravel(), Y_pred_train) print("accuracy on train data = %.3f" % cost_array[-1, 1]) print("accuracy on test data = %.3f" % cost_array[-1, 0]) if create_conf == True: #creating confusion matrix numbers = np.arange(0, 10) conf_matrix = confusion_matrix(Y_pred, Y_test, normalize="true") heatmap = sb.heatmap(conf_matrix, cmap="viridis", xticklabels=["%d" % i for i in numbers], yticklabels=["%d" % i for i in numbers], cbar_kws={'label': 'Accuracy'}, fmt=".2", edgecolor="none", annot=True) heatmap.set_xlabel("pred") heatmap.set_ylabel("true") heatmap.set_title(r"FFNN prediction accuracy with $\lambda$ = {:.1e} $\eta$ = {:.1e}"\ .format(penalty, eta)) fig = heatmap.get_figure() fig.savefig("../figures/MNIST_confusion_net.pdf", bbox_inches='tight', pad_inches=0.1, dpi=1200) plt.show() return cost_array[-1]
def predict(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] # 2 층 레이어 구성 # 각 레이어 에서는 행렬(데이터를 2차원 배열로 취급)의 내적곱 계산 a1 = np.dot(x, W1) + b1 # sigmoid 활성화 함수로 활성화 여부를 결정한다. z1 = sigmoid(a1) # 위 과정을 다음 레이어에서 반복 a2 = np.dot(z1, W2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, W3) + b3 # 최종 결과시에는 softmax 로 각 노드(숫자0~9)를 1.0 기준 확률로 계산한다. # 실제 노트 값이 가장 큰값이 가장 높은 확률로 계산되고 계산 비용이 비싸서 # softmax 는 실제 사용시에는 사용하지 않고, 훈련할때만 사용한다. y = softmax(a3) return y
def predict(self, x): W1 = self.params['W1'] W2 = self.params['W2'] b1 = self.params['b1'] b2 = self.params['b2'] a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 y = softmax(a2) return y
def predict(self, y_new): y_tmp = y_new + [-1] predicted = sigmoid( np.dot(np.array(y_tmp).reshape(1, self.input_layer), self.weights)) if predicted > 0.5: print("Fighter Plane") print("Confidence: ", predicted) else: print("Bomber Plane") print("Confidence: ", 1 - predicted) return predicted
def model(self, x, y, epochs=100, learning='gradient'): """ Parameters ---------- x : ndarray, list Features of the training data y : ndarray, list Labels of the training data epochs : int, optional Number of epochs to run learning : str, optional Type of the learning Return ------ None : NoneType """ self.epochs = epochs self.learning = learning self.weights = np.random.randn(self.input_layer, 1) for e in range(self.epochs): del_weights = np.zeros(self.weights.shape) predict = list() for i in range(len(x)): x_tmp = list(x[i]) + [-1] output = sigmoid( np.dot( np.array(x_tmp).reshape(1, self.input_layer), self.weights)) predict.append(output[0][0]) for j in range(self.input_layer): if self.learning == 'gradient': del_weights[j] += self.lr * ( y[i] - output[0]) * output[0] * (1 - output[0]) * x_tmp[j] elif self.learning == 'perceptron': del_weights[j] += self.lr * (y[i] - output[0]) * x_tmp[j] else: print("Not a valid learning algorithm.") sys.exit() for j in range(self.input_layer): self.weights[j] += del_weights[j] self.training_loss.append(MSE(y, predict)) print( f"Epochs: {e+1}/{epochs}.. Training Loss: {self.training_loss[e]: 3f}..\nWeights: {self.weights.tolist()}\n" ) self.slope = (-1 * self.weights[0] / self.weights[1])[0] self.intercept = (self.weights[2] / self.weights[1])[0] return
def build_network(self): # type: () -> Network if (self._number_of_input_neurons is None or self._number_of_output_neurons is None): raise ValueError( 'A network must have both an input layer and an output layer in order to be built.' ) hidden_layer_neurons = [[ HiddenOrOutputLayerNeuron(self._hidden_layers_weights[key][i], self._hidden_layers_biases[key][i], sigmoid()) for i in range(0, number_of_neurons) ] for key, number_of_neurons in self._hidden_layers_distribution.items()] output_layer_neurons = [ HiddenOrOutputLayerNeuron(self._output_neuron_weights[i], self._output_neuron_biases[i], sigmoid()) for i in range(0, self._number_of_output_neurons) ] return Network(hidden_layer_neurons, output_layer_neurons)
def forward(self, x): """ Forward pass in the Neural network Parameters ---------- x : ndarray, list An input training example Return ------ output : ndarray An output vector """ output = sigmoid(np.dot(self.W.T, x) + self.b) return output
def b(self): return self.parameters[:, :1] def compute_z(self, a): x = np.concatenate((np.ones([1, 1]), a), axis=0) return self.parameters.dot(x) def compute_forward(self, a): if self.layer_type == layertype.INPUT: return a return self.f.evaluate(self.compute_z(a)) #return dgemv(1.0, self.parameters, x) #slower on my machine # Also slower #def compute_forward(self, a): # b = self.parameters[:,:1] # W = self.parameters[:,1:] # return self.dgemv(1.0, W, a, 1.0, b) def compute_dadb(self, a): return self.f.evaluate_firstderivative(self.compute_z(a)) if __name__ == "__main__": # run unit tests lay = Layer(200, 250, sigmoid(), layertype.INSIDE) lay.initialize_weights() x = np.random.randn(lay.nb_neurons_prev()).reshape((-1, 1)) y = lay.compute_forward(x) print(y.shape)
import numpy as np import activation_function #make matrix A = np.array([1, 2, 3, 4]) print(A) print(np.ndim(A)) print(np.shape(A)) B = np.array([[1, 2], [3, 4], [5, 6]]) print(B) print(np.ndim(B)) print(np.shape(B)) C = np.array([[10, 10, 10], [5, 5, 5]]) print(np.dot(B, C), "\n") #neural network example W = np.array([[5, 5, 8, 10, 10], [1, 5, 3, 4, 5], [10, 8, 9, 7, 10], [1, 1, 5, 2, 10]]) h1 = np.dot(A, W) print(h1) Z1 = activation_function.sigmoid(h1) print("Z1: \n", Z1)
z = np.ravel(FrankeFunction(x, y) + noise * np.random.randn(n, n)) data = data_prep() X = data.X_D(x, y, z, 1) #X_train,X_test,z_train,z_test train_input, test_input, train_output, test_output = data.train_test_split_scale( ) y_train = np.reshape( train_output, (-1, 1)) #the shape was (x,) we needed (x,1) for obvious reasons y_test = np.reshape(test_output, (-1, 1)) x_train = train_input[:, [1, 2]] x_test = test_input[:, [1, 2]] #Setting up network layer1 = DenseLayer(2, 10, sigmoid()) layer2 = DenseLayer(10, 20, sigmoid()) layer3 = DenseLayer(20, 1, identity()) layers = [layer1, layer2, layer3] network = NN(layers) #Finding MSE on untrained network mse = MSE() print("Test MSE before training network: %.4f" % mse(y_test, network.feedforward(x_test))) #Back-propagation m = x_train.shape[0] """batch = np.arange(0, m) for i in range(500): random.shuffle(batch) x_train_shuffle = x_train[batch] y_train_shuffle = y_train[batch]
def test_sigmoid(self): self.assertEqual(af.sigmoid(-1), 0.2689414213699951) self.assertEqual(af.sigmoid(0), 0.5) self.assertEqual(af.sigmoid(1), 0.7310585786300049)
def model(self, x, y, epochs=100): """ Parameters ---------- x : ndarray Features of the training data y : ndarray Labels of the training data epochs : int, optional Number of epochs Returns ------- None : NoneType """ self.epochs = epochs self.data_size = len(x) for i in range(self.layers - 2): self.weights.append( self.__initialization(self.nodes[i], self.nodes[i + 1] - 1)) self.weights.append( self.__initialization(self.nodes[i + 1], self.output_layer)) for e in range(self.epochs): predict = list() for i in range(self.data_size): tmp = [x[i]] # Forward pass for n in range(self.layers - 1): self.activations.append(list(tmp[0]) + [-1]) tmp = sigmoid( np.dot( np.array(list(tmp[0]) + [-1]).reshape( 1, self.nodes[n]), self.weights[n])) output = tmp[0] predict.append(output) # Backward pass del_weights0 = [[0 for u in range(self.nodes[-2])] for v in range(self.nodes[-1])] for u in range(self.nodes[-1]): for v in range(self.nodes[-2]): del_weights0[u][v] += self.lr * ( y[i][u] - output[u]) * output[u] * ( 1 - output[u]) * self.activations[-1][v] del_weights1 = [[0 for u in range(self.nodes[-3])] for v in range(self.nodes[-2] - 1)] for q in range(self.nodes[-3]): for w in range(self.nodes[-2] - 1): for z in range(self.nodes[-1]): del_weights1 += self.lr * ( y[i][z] - output[z]) * output[u] * ( 1 - output[u]) * self.weights[1][w][ z] * self.activations[-1][w] * ( 1 - self.activations[-1][w] ) * self.activations[-2][q] del_weights0 = np.array(del_weights0).reshape( self.nodes[-2], self.nodes[-1]) for u in range(self.nodes[-2]): for v in range(self.nodes[-1]): self.weights[-1][u][v] += del_weights0[u][v] for u in range(self.nodes[-3]): for v in range(self.nodes[-2] - 1): self.weights[-2][u][v] += del_weights1[u][v] predict = np.array(predict).reshape(self.data_size, self.output_layer) self.training_loss.append(MSE(y, predict)) print( f"Epochs: {e+1}/{self.epochs}.. Training Loss: {self.training_loss[e]}.." )
def layer(X, W1, B1): A1 = np.dot(X, W1) + B1 return activation_function.sigmoid(A1)
import numpy as np import pandas as pd import activation_function history_weight = [] data = pd.read_csv("C:/Users/yxmfi/Desktop/temp_file" + u"/jupyter博客/testSet.csv", encoding='utf-8', error_bad_lines=False) features = data.drop(['label'], axis=1) targets = data['label'] n_records, n_features = features.shape last_loss = None # Initialize weights weights = np.random.normal(scale=1 / n_features**.5, size=n_features) # Neural Network hyperparameters epochs = 1000 learnrate = 0.5 for e in range(epochs): del_w = np.zeros(weights.shape) for x, y in zip(features.values, targets): h = np.dot(weights, x) output = activation_function.sigmoid(h) error = y - output error_term = error * activation_function.sigmoid_prime(h) del_w += error_term * x weights += learnrate * del_w / n_records
if __name__ == "__main__": """ # load and prepare the iris dataset iris = load_iris() x = iris.data y_ = iris.target.reshape(-1, 1) # Convert data to a single column # One Hot encode the class labels encoder = OneHotEncoder(sparse=False) y = encoder.fit_transform(y_) train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.20, stratify=y) """ # assemble ANN architecture = [2, 4, 10, 4, 2] #f = [RELU()]*len(architecture) f = [linear(), RELU(), RELU(), sigmoid(), sigmoid()] ANN = neuralnetwork(f, architecture) for ll in ANN.layers: print(ll.layer_type, ll.nb_neurons_prev(), ll.nb_neurons()) ANN.initialize_weights() #ANN.load_data(train_x[:1,:], train_y[:1,:]) ANN.load_data(np.array([[1.0, 1.0], [-1.0, 1.0], [-1.0, -1.0]]), np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0]])) cost = ANN.compute_loss() print('cost=', cost) cost2 = ANN.compute_derivative() print('cost2=', cost2) # check derivative with finite-difference EPS = [1e-2, 1e-3, 1e-4] layernb = 3