class MnistNetMiniBatch: def __init__(self): self.d1_layer = Dense(784, 100) self.a1_layer = ReLu() self.drop1_layer = Dropout(0.5) self.d2_layer = Dense(100, 50) self.a2_layer = ReLu() self.drop2_layer = Dropout(0.25) self.d3_layer = Dense(50, 10) self.a3_layer = Softmax() def forward(self, x, train=True): net = self.d1_layer.forward(x) net = self.a1_layer.forward(net) net = self.drop1_layer.forward(net, train) net = self.d2_layer.forward(net) net = self.a2_layer.forward(net) net = self.drop2_layer.forward(net, train) net = self.d3_layer.forward(net) net = self.a3_layer.forward(net) return (net) def backward(self, dz, learning_rate=0.01, mini_batch=True, update=False, len_mini_batch=None): dz = self.a3_layer.backward(dz) dz = self.d3_layer.backward(dz, learning_rate=learning_rate, mini_batch=mini_batch, update=update, len_mini_batch=len_mini_batch) dz = self.drop2_layer.backward(dz) dz = self.a2_layer.backward(dz) dz = self.d2_layer.backward(dz, learning_rate=learning_rate, mini_batch=mini_batch, update=update, len_mini_batch=len_mini_batch) dz = self.drop1_layer.backward(dz) dz = self.a1_layer.backward(dz) dz = self.d1_layer.backward(dz, learning_rate=learning_rate, mini_batch=mini_batch, update=update, len_mini_batch=len_mini_batch) return dz
def test_softmax_calculate_gradient(self): # Given pre_activation = np.array([[1, 2, 3, 6], [2, 4, 5, 6], [3, 8, 7, 6]]) target = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]]) softmax = Softmax() # When activation = softmax.apply_activation(pre_activation) grad = softmax.calculate_gradient(activation, target)
def __init__(self): self.d1_layer = Dense(784, 100) self.a1_layer = ReLu() self.drop1_layer = Dropout(0.5) self.d2_layer = Dense(100, 50) self.a2_layer = ReLu() self.drop2_layer = Dropout(0.25) self.d3_layer = Dense(50, 10) self.a3_layer = Softmax()
def test_softmax_apply_activation(self): # Given pre_activation = np.array([[1, 2, 3, 6], [2, 4, 5, 6], [3, 8, 7, 6]]) softmax = Softmax() # When activation = softmax.apply_activation(pre_activation) # Then sum_of_columns = 4.0 self.assertTrue( np.isclose(sum_of_columns, np.sum(np.sum(activation, axis=0)), 1e-3))
def build_model(): model = NeuralNetwork(loss=CrossEntropy(), n_iterations=800, learning_rate=0.13) model.add(Layer(4, 10, activation=Relu())) model.add(Layer(10, 3, activation=Softmax())) return model
def sequential(self, network=[1, 1, 1], activation=[ReLU(), Softmax()], loss=Cross_entropy(), regu=Default(), weight_type='default'): self.net = network self.activation1 = activation[0] self.activation2 = activation[1] self.loss = loss self.regu = regu self.init_weight(weight_type)
def __init__(self): self.layers = [] self.activation = Softmax() self.init()
class Network(object): def __init__(self): self.layers = [] self.activation = Softmax() self.init() def init(self): for layer in self.layers: layer.init() def predict(self, x): _ = x for layer in self.layers: _ = layer.forward(_) self.y = self.activation.forward(_) return self.y def train(self, x, target): y = self.predict(x) _ = self.activation.backward(y, target) for layer in reversed(self.layers): _ = layer.backward(_) layer.update() return self.activation.loss(y, target) def dump_params(self): odict = {} odict["layers"] = [(l.W, l.b) for l in self.layers] return odict def load_params(self, idict): for l, p in zip(self.layers, idict["layers"]): l.W, l.b = p def __test(self): """ >>> from layers import Fullconnect >>> from nonlinears import ReLU, Tanh >>> from activations import Softmax, Sigmoid, Identity >>> from updaters import GradientDescent, NotUpdate >>> >>> learning_rate = 0.01 >>> np.random.seed(0xC0FFEE) >>> >>> # Multiclass classification >>> n = Network() >>> n.layers.append( Fullconnect(2, 10, ReLU.function, ReLU.derivative, updater=GradientDescent(learning_rate)) ) >>> n.layers.append( Fullconnect(10, 2, updater=GradientDescent(learning_rate)) ) >>> n.activation = Softmax() >>> >>> for epoch in range(0, 20): ... loss = n.train( x = np.array([ [1, 2, 1, 2, 5, 6, 5, 6], ... [5, 4, 4, 5, 1, 2, 2, 1]]).T, ... target = np.array([ [1, 1, 1, 1, 0, 0, 0, 0], ... [0, 0, 0, 0, 1, 1, 1, 1]]).T ) ... if epoch%5 == 0: ... print 'epoch:%04d loss:%.2f'%(epoch, loss) epoch:0000 loss:6.64 epoch:0005 loss:0.65 epoch:0010 loss:0.36 epoch:0015 loss:0.25 >>> >>> y = n.predict( np.array( [[1, 6, 3], [5, 1, 4]] ).T ) >>> print ['%.2f'%_ for _ in y[0]] ['0.99', '0.01'] >>> [_ for _ in np.argmax(y, -1)] [0, 1, 0] >>> >>> # Multiple-class classification >>> n = Network() >>> n.layers.append( Fullconnect(2, 10, ReLU.function, ReLU.derivative, updater=GradientDescent(learning_rate)) ) >>> n.layers.append( Fullconnect(10, 2, updater=GradientDescent(learning_rate)) ) >>> n.activation = Sigmoid() >>> >>> for epoch in range(0, 20): ... loss = n.train( x = np.array([ [1, 2, 1, 2, 4, 5, 4, 5, 5, 6, 5, 6], ... [5, 4, 4, 5, 5, 4, 5, 4, 1, 2, 2, 1]]).T, ... target = np.array([ [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], ... [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]]).T ) ... if epoch%5 == 0: ... print 'epoch:%04d loss:%.2f'%(epoch, loss) epoch:0000 loss:29.39 epoch:0005 loss:4.78 epoch:0010 loss:3.38 epoch:0015 loss:2.64 >>> >>> y = n.predict( np.array( [[5, 6, 3], [4, 1, 4]] ).T ) >>> print ['%.2f'%_ for _ in y[0]] ['0.86', '0.96'] >>> >>> # Regression >>> n = Network() >>> n.layers.append( Fullconnect(2, 10, ReLU.function, ReLU.derivative, updater=GradientDescent(learning_rate)) ) >>> n.layers.append( Fullconnect(10, 2, updater=GradientDescent(learning_rate)) ) >>> n.activation = Identity() >>> >>> for epoch in range(0, 20): ... loss = n.train( x = np.array([ [1, 2, 1, 2, 5, 6, 5, 6], ... [5, 4, 4, 5, 1, 2, 2, 1]]).T, ... target = np.array([ [1, 1, 1, 1, 0, 0, 0, 0], ... [0, 0, 0, 0, 1, 1, 1, 1]]).T ) ... if epoch%5 == 0: ... print 'epoch:%04d loss:%.2f'%(epoch, loss) epoch:0000 loss:52.82 epoch:0005 loss:1.81 epoch:0010 loss:1.26 epoch:0015 loss:0.89 >>> >>> y = n.predict( np.array( [[1, 6, 5], [5, 1, 4]] ).T ) >>> print ['%.2f'%_ for _ in y[0]] ['1.19', '-0.00'] >>> >>> # Auto-encoder >>> n = Network() >>> n.layers.append( Fullconnect( 2, 10, Tanh.function, Tanh.derivative, GradientDescent(learning_rate)) ) >>> n.layers.append( Fullconnect(10, 10, Tanh.function, Tanh.derivative, GradientDescent(learning_rate)) ) >>> n.layers.append( Fullconnect(10, 10, updater=NotUpdate()) ) >>> n.layers.append( Fullconnect(10, 2, updater=NotUpdate()) ) >>> n.activation = Identity() >>> >>> # for auto-encoder (weight share) >>> n.layers[2].W = n.layers[1].W.T >>> n.layers[3].W = n.layers[0].W.T >>> >>> x = np.array( [[1, 2, 1, 2, 5, 6, 5, 6, 5, 6, 5, 6], ... [5, 4, 4, 5, 5, 4, 5, 4, 1, 2, 2, 1]] ).T >>> >>> for epoch in range(0, 301): ... loss = n.train( x=x, target=x ) ... if epoch%100 == 0: ... print 'epoch:%04d loss:%.2f'%(epoch, loss) epoch:0000 loss:98.38 epoch:0100 loss:9.83 epoch:0200 loss:1.79 epoch:0300 loss:1.82 """ pass
val = df.drop(train.index) yr = train.iloc[:, 0].to_numpy() X_train, y_train = train.iloc[:, 1:].to_numpy() / 255.0, onehotcode(yr) X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1)) y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1)) print(X_train.shape, y_train.shape) X_val, y_val = val.iloc[:, 1:].to_numpy() / 255.0, val.iloc[:, 0].to_numpy() X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1)) print(X_val.shape, y_val.shape) nn = NN() nn.sequential(network=[784, 128, 10], activation=[Tanh(), Softmax()], loss=Cross_entropy(), regu=Ridge(n=X_train.shape[0], lmda=5), weight_type='glorot_normal') nn.load_model('tanL2128') #nn.fit(X_train,y_train,X_val,y_val,32,2) #nn.save_model('tanL2128') nn.weight_heatmap() df2 = pd.read_csv('mnist_test.csv') X_test, y_test = df2.iloc[:, 1:].to_numpy() / 255.0, df2.iloc[:, 0].to_numpy() X_test = X_test.reshape((X_test.shape[0], 28, 28)) print(X_test.shape, y_test.shape) nn.annote_test(X_test[:100], 10, 10)
class Network(object): def __init__(self): self.layers = [] self.activation = Softmax() self.init() def init(self): for layer in self.layers: layer.init() def predict(self, x): _ = x for layer in self.layers: _ = layer.forward(_) self.y = self.activation.forward(_) return self.y def train(self, x, target): y = self.predict(x) _ = self.activation.backward(y, target) for layer in reversed(self.layers): _ = layer.backward(_) layer.update() return self.activation.loss(y, target) def dump_params(self): odict = {} odict['layers'] = [(l.W, l.b) for l in self.layers] return odict def load_params(self, idict): for l, p in zip(self.layers, idict['layers']): l.W, l.b = p def __test(self): ''' >>> from layers import Fullconnect >>> from nonlinears import ReLU, Tanh >>> from activations import Softmax, Sigmoid, Identity >>> from updaters import GradientDescent, NotUpdate >>> >>> learning_rate = 0.01 >>> np.random.seed(0xC0FFEE) >>> >>> # Multiclass classification >>> n = Network() >>> n.layers.append( Fullconnect(2, 10, ReLU.function, ReLU.derivative, updater=GradientDescent(learning_rate)) ) >>> n.layers.append( Fullconnect(10, 2, updater=GradientDescent(learning_rate)) ) >>> n.activation = Softmax() >>> >>> for epoch in range(0, 20): ... loss = n.train( x = np.array([ [1, 2, 1, 2, 5, 6, 5, 6], ... [5, 4, 4, 5, 1, 2, 2, 1]]).T, ... target = np.array([ [1, 1, 1, 1, 0, 0, 0, 0], ... [0, 0, 0, 0, 1, 1, 1, 1]]).T ) ... if epoch%5 == 0: ... print 'epoch:%04d loss:%.2f'%(epoch, loss) epoch:0000 loss:6.64 epoch:0005 loss:0.65 epoch:0010 loss:0.36 epoch:0015 loss:0.25 >>> >>> y = n.predict( np.array( [[1, 6, 3], [5, 1, 4]] ).T ) >>> print ['%.2f'%_ for _ in y[0]] ['0.99', '0.01'] >>> [_ for _ in np.argmax(y, -1)] [0, 1, 0] >>> >>> # Multiple-class classification >>> n = Network() >>> n.layers.append( Fullconnect(2, 10, ReLU.function, ReLU.derivative, updater=GradientDescent(learning_rate)) ) >>> n.layers.append( Fullconnect(10, 2, updater=GradientDescent(learning_rate)) ) >>> n.activation = Sigmoid() >>> >>> for epoch in range(0, 20): ... loss = n.train( x = np.array([ [1, 2, 1, 2, 4, 5, 4, 5, 5, 6, 5, 6], ... [5, 4, 4, 5, 5, 4, 5, 4, 1, 2, 2, 1]]).T, ... target = np.array([ [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], ... [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]]).T ) ... if epoch%5 == 0: ... print 'epoch:%04d loss:%.2f'%(epoch, loss) epoch:0000 loss:29.39 epoch:0005 loss:4.78 epoch:0010 loss:3.38 epoch:0015 loss:2.64 >>> >>> y = n.predict( np.array( [[5, 6, 3], [4, 1, 4]] ).T ) >>> print ['%.2f'%_ for _ in y[0]] ['0.86', '0.96'] >>> >>> # Regression >>> n = Network() >>> n.layers.append( Fullconnect(2, 10, ReLU.function, ReLU.derivative, updater=GradientDescent(learning_rate)) ) >>> n.layers.append( Fullconnect(10, 2, updater=GradientDescent(learning_rate)) ) >>> n.activation = Identity() >>> >>> for epoch in range(0, 20): ... loss = n.train( x = np.array([ [1, 2, 1, 2, 5, 6, 5, 6], ... [5, 4, 4, 5, 1, 2, 2, 1]]).T, ... target = np.array([ [1, 1, 1, 1, 0, 0, 0, 0], ... [0, 0, 0, 0, 1, 1, 1, 1]]).T ) ... if epoch%5 == 0: ... print 'epoch:%04d loss:%.2f'%(epoch, loss) epoch:0000 loss:52.82 epoch:0005 loss:1.81 epoch:0010 loss:1.26 epoch:0015 loss:0.89 >>> >>> y = n.predict( np.array( [[1, 6, 5], [5, 1, 4]] ).T ) >>> print ['%.2f'%_ for _ in y[0]] ['1.19', '-0.00'] >>> >>> # Auto-encoder >>> n = Network() >>> n.layers.append( Fullconnect( 2, 10, Tanh.function, Tanh.derivative, GradientDescent(learning_rate)) ) >>> n.layers.append( Fullconnect(10, 10, Tanh.function, Tanh.derivative, GradientDescent(learning_rate)) ) >>> n.layers.append( Fullconnect(10, 10, updater=NotUpdate()) ) >>> n.layers.append( Fullconnect(10, 2, updater=NotUpdate()) ) >>> n.activation = Identity() >>> >>> # for auto-encoder (weight share) >>> n.layers[2].W = n.layers[1].W.T >>> n.layers[3].W = n.layers[0].W.T >>> >>> x = np.array( [[1, 2, 1, 2, 5, 6, 5, 6, 5, 6, 5, 6], ... [5, 4, 4, 5, 5, 4, 5, 4, 1, 2, 2, 1]] ).T >>> >>> for epoch in range(0, 301): ... loss = n.train( x=x, target=x ) ... if epoch%100 == 0: ... print 'epoch:%04d loss:%.2f'%(epoch, loss) epoch:0000 loss:98.38 epoch:0100 loss:9.83 epoch:0200 loss:1.79 epoch:0300 loss:1.82 ''' pass