def __init__(self, inputs=2, outputs=1, file_name=None): if file is None: self.w = np.random.randn(outputs, inputs) self.b = np.zeros([outputs, 1]) else: self.w = np.load(file_name)['w'] self.b = np.load(file_name)['b'] self.learning_rate = 0.18 self.dz = 0 self.sigmoid = Sigmoid()
class LR(object): def __init__(self, inputs=2, outputs=1, file_name=None): if file is None: self.w = np.random.randn(outputs, inputs) self.b = np.zeros([outputs, 1]) else: self.w = np.load(file_name)['w'] self.b = np.load(file_name)['b'] self.learning_rate = 0.18 self.dz = 0 self.sigmoid = Sigmoid() def forward(self, input_data): output = np.matmul(self.w, input_data.T) + self.b output = self.sigmoid.activate(output) return output def backward(self, input_data, y, o): m = input_data.shape[0] self.dz = o - y dw = np.matmul(self.dz, input_data) / m db = np.sum(self.dz) / m a = np.multiply(self.learning_rate, dw) self.w -= a self.b -= db * self.learning_rate def train(self, test_input, y): o = self.forward(test_input) self.backward(test_input, y, o) @staticmethod def cost(y, t): return Cost(y, t).softmax_classification()
def __init__(self, layers): """ 神经网络类 :param layers: """ # 构造神经网络层次结构 self.layers = [ FC(ip, op, Sigmoid()) for ip, op in zip(layers[:-1], layers[1:]) ]
def __init__(self, widths=[2, 2, 3, 2], lr=0.05, loss=L2Loss()): sigmoid = Sigmoid() self._layers = [] self._lr = lr for n_in, n_out in zip(widths[:-1], widths[1:]): linearLayer = LinearLayer(n_in, n_out, bias=True, scale=1 / np.sqrt(n_in)) self._layers.append(ActivatedLayer(linearLayer, sigmoid)) self._loss = loss
def __init__(self,input_size,layers,lr=0.01,batch_size=10): self.input_size = input_size self.lr = lr self.layers = [] self.train_step_count = 0 self.batch_size = batch_size cur_input_size = self.input_size for cur_layer_size in layers[:-1]: self.layers.append(Dense(cur_input_size,cur_layer_size)) cur_input_size = cur_layer_size self.layers.append(Dense(cur_input_size,layers[-1],act_func=Sigmoid())) print([x.layer_size for x in self.layers])
def trainXORWithSigmoid(self, training, stopCondition): logicStatements = num.array([[0, 0, 0], [0, 1, 1], [1, 0, 0], [1, 1, 1], [1, 0, 1]]) expectedOutput = num.array([[0], [0], [1], [1], [0]]) S = [logicStatements.shape[1], 3, 2, expectedOutput.shape[1]] model = MultiPerceptron(activation=Sigmoid(), weightsInitializer=ZeroInitializer()) model.configureLayers(S) training.executeOn(model=model, input=logicStatements, target=expectedOutput, learningRate=0.1, stopCondition=stopCondition) return model
def test_evaluateOnVectors(self): sigmoid = Sigmoid() vector = num.array([1, 2, 3]) num.testing.assert_array_equal(sigmoid(vector), 1 / (1 + num.exp(-vector)))
# ######## # # Training # # ######## # X, y = Input(), Input() W1, b1 = Input(), Input() W2, b2 = Input(), Input() W1_ = np.random.randn(784, 30) b1_ = np.random.randn(30) W2_ = np.random.randn(30, 10) b2_ = np.random.randn(10) l1 = Linear(X, W1, b1) s1 = Sigmoid(l1) l2 = Linear(s1, W2, b2) s2 = Sigmoid(l2) cost = SSE(y, s2) feed_dict = {X: train_x, y: train_y, W1: W1_, b1: b1_, W2: W2_, b2: b2_} hyper_parameters = [W1, b1, W2, b2] graph = Network.topological_sort(feed_dict) epoch = 1000 batch_size = 30 steps_per_batch = len(train_y) // batch_size for i in tqdm(xrange(epoch)):
def delta(z, a, y): return (a - y) * Sigmoid.prime(z)
- layer3.py date: - 2017.01.06 description: - Implement LSTM layer. """ from activation import Tanh, Sigmoid from gate import AddGate, MultiplyGate import numpy as np mulGate = MultiplyGate() sig = Sigmoid() tanh = Tanh() activation = tanh ''' For hidden layer cell: input: x(t), h(t-1), c(t-1) ''' class LstmGate(object): def __init__(self, U, W, B, activation=sig): self.U = U self.W = W
model = Sequential() model.add(Conv2D, ksize=3, stride=1, activation=ReLU(), input_size=(8, 8, 1), filters=7, padding=0) model.add(MaxPool2D, ksize=2, stride=1, padding=0) model.add(Conv2D, ksize=2, stride=1, activation=ReLU(), filters=5, padding=0) model.add(Flatten) model.add(Dense, units=1, activation=Sigmoid()) model.summary() model.compile(BinaryCrossEntropy()) print("Initial Loss", model.evaluate(X, y)[0]) model.fit(X, y, n_epochs=100, batch_size=300, learning_rate=0.001, optimizer=GradientDescentOptimizer(), verbose=1) print("Final Loss", model.evaluate(X, y)[0])
def test_description(self): sigmoid = Sigmoid() self.assertEqual(sigmoid.description(), 'Sigmoid')
def test_evaluateDerivativeOnVectors(self): sigmoid = Sigmoid() vector = num.array([1, 2, 3]) num.testing.assert_array_equal(sigmoid.derivative(vector), num.array([0, -2, -6]))
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import scipy as sp from activation import Sigmoid, Softmax, Tanh ACTIVATION_MAP = {'tanh': Tanh(), 'sigmoid': Sigmoid(), 'softmax': Softmax()} class LSTMLayer(object): def __init__(self, activation='tanh'): self.activation = activation self.a = None self.h = None self.y = None def activate(self, x): return ACTIVATION_MAP[self.activation].eval(x) def backward(self): return ACTIVATION_MAP[self.activation].gradient(self.a) def loss(self, t): return ACTIVATION_MAP[self.activation].loss(t, self.y) class HiddenLayer(LSTMLayer): def __init__(self, hidden_size=10, gate_activation='sigmoid',
def test_evaluateOnNumbers(self): sigmoid = Sigmoid() self.assertEqual(sigmoid(1), 1 / (1 + num.exp(-1))) self.assertEqual(sigmoid(0.73), 1 / (1 + num.exp(-0.73)))
def __init__(self, arg): super(LSTM, self).__init__() self.input_dim = arg["input_dim"] self.hidden_dim = arg["hidden_dim"] self.output_dim = arg["output_dim"] self.w_forget = 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1 self.w_memory_weight = 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1 self.w_memory_content = 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1 self.w_output = 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1 self.w_predict = 2*np.random.random((self.output_dim, self.hidden_dim)) - 1 self.activation_forget = arg["activation_forget"]() if "activation_forget" in arg else Sigmoid() self.activation_memory_weight = arg["activation_memory_weight"]() if "activation_memory_weight" in arg else Sigmoid() self.activation_memory_content = arg["activation_memory_content"]() if "activation_memory_content" in arg else Tanh() self.activation_output_weight = arg["activation_output_weight"]() if "activation_output_weight" in arg else Sigmoid() self.activation_output_content = arg["activation_output_content"]() if "activation_output_content" in arg else Tanh()
def main(): # generate data and translate labels train_features, train_targets = generate_all_datapoints_and_labels() test_features, test_targets = generate_all_datapoints_and_labels() train_labels, test_labels = convert_labels(train_targets), convert_labels(test_targets) print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('Model: Linear + ReLU + Linear +ReLU + Linear + ReLU + Linear + Tanh') print('Loss: MSE') print('Optimizer: SGD') print('*************************************************************************') print('Training') print('*************************************************************************') # build network, loss and optimizer for Model 1 my_model_design_1=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(), Linear(25,25), ReLU(),Linear(25,2),Tanh()] my_model_1=Sequential(my_model_design_1) optimizer_1=SGD(my_model_1,lr=1e-3) criterion_1=LossMSE() # train Model 1 batch_size=1 for epoch in range(50): temp_train_loss_sum=0. temp_test_loss_sum=0. num_train_correct=0 num_test_correct=0 # trained in batch-fashion: here batch size = 1 for temp_batch in range(0,len(train_features), batch_size): temp_train_features=train_features.narrow(0, temp_batch, batch_size) temp_train_labels=train_labels.narrow(0, temp_batch, batch_size) for i in range(batch_size): # clean parameter gradient before each batch optimizer_1.zero_grad() temp_train_feature=temp_train_features[i] temp_train_label=temp_train_labels[i] # forward pass to compute loss temp_train_pred=my_model_1.forward(temp_train_feature) temp_train_loss=criterion_1.forward(temp_train_pred,temp_train_label) temp_train_loss_sum+=temp_train_loss _, temp_train_pred_cat=torch.max(temp_train_pred,0) _, temp_train_label_cat=torch.max(temp_train_label,0) if temp_train_pred_cat==temp_train_label_cat: num_train_correct+=1 # calculate gradient according to loss gradient temp_train_loss_grad=criterion_1.backward(temp_train_pred,temp_train_label) # accumulate parameter gradient in each batch my_model_1.backward(temp_train_loss_grad) # update parameters by optimizer optimizer_1.step() # evaluate the current model on testing set # only forward pass is implemented for i_test in range(len(test_features)): temp_test_feature=test_features[i_test] temp_test_label=test_labels[i_test] temp_test_pred=my_model_1.forward(temp_test_feature) temp_test_loss=criterion_1.forward(temp_test_pred,temp_test_label) temp_test_loss_sum+=temp_test_loss _, temp_test_pred_cat=torch.max(temp_test_pred,0) _, temp_test_label_cat=torch.max(temp_test_label,0) if temp_test_pred_cat==temp_test_label_cat: num_test_correct+=1 temp_train_loss_mean=temp_train_loss_sum/len(train_features) temp_test_loss_mean=temp_test_loss_sum/len(test_features) temp_train_accuracy=num_train_correct/len(train_features) temp_test_accuracy=num_test_correct/len(test_features) print("Epoch: {}/{}..".format(epoch+1, 50), "Training Loss: {:.4f}..".format(temp_train_loss_mean), "Training Accuracy: {:.4f}..".format(temp_train_accuracy), "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean), "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy), ) # # visualize the classification performance of Model 1 on testing set test_pred_labels_1=[] for i in range(1000): temp_test_feature=test_features[i] temp_test_label=test_labels[i] temp_test_pred=my_model_1.forward(temp_test_feature) _, temp_train_pred_cat=torch.max(temp_test_pred,0) if test_targets[i].int() == temp_train_pred_cat.int(): test_pred_labels_1.append(int(test_targets[i])) else: test_pred_labels_1.append(2) fig,axes = plt.subplots(1,1,figsize=(6,6)) axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_1) axes.set_title('Classification Performance of Model 1') plt.show() print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('Model: Linear + ReLU + Linear + Dropout+ SeLU + Linear + Dropout + ReLU + Linear + Sigmoid') print('Loss: Cross Entropy') print('Optimizer: Adam') print('*************************************************************************') print('Training') print('*************************************************************************') # build network, loss function and optimizer for Model 2 my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), SeLU(), Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2), Sigmoid()] my_model_2=Sequential(my_model_design_2) optimizer_2=Adam(my_model_2,lr=1e-3) criterion_2=CrossEntropy() # train Model 2 batch_size=1 epoch=0 while(epoch<25): temp_train_loss_sum=0. temp_test_loss_sum=0. num_train_correct=0 num_test_correct=0 # trained in batch-fashion: here batch size = 1 for temp_batch in range(0,len(train_features), batch_size): temp_train_features=train_features.narrow(0, temp_batch, batch_size) temp_train_labels=train_labels.narrow(0, temp_batch, batch_size) for i in range(batch_size): # clean parameter gradient before each batch optimizer_2.zero_grad() temp_train_feature=temp_train_features[i] temp_train_label=temp_train_labels[i] # forward pass to compute loss temp_train_pred=my_model_2.forward(temp_train_feature) temp_train_loss=criterion_2.forward(temp_train_pred,temp_train_label) temp_train_loss_sum+=temp_train_loss _, temp_train_pred_cat=torch.max(temp_train_pred,0) _, temp_train_label_cat=torch.max(temp_train_label,0) if temp_train_pred_cat==temp_train_label_cat: num_train_correct+=1 # calculate gradient according to loss gradient temp_train_loss_grad=criterion_2.backward(temp_train_pred,temp_train_label) ''' if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0): continue ''' # accumulate parameter gradient in each batch my_model_2.backward(temp_train_loss_grad) # update parameters by optimizer optimizer_2.step() # evaluate the current model on testing set # only forward pass is implemented for i_test in range(len(test_features)): temp_test_feature=test_features[i_test] temp_test_label=test_labels[i_test] temp_test_pred=my_model_2.forward(temp_test_feature) temp_test_loss=criterion_2.forward(temp_test_pred,temp_test_label) temp_test_loss_sum+=temp_test_loss _, temp_test_pred_cat=torch.max(temp_test_pred,0) _, temp_test_label_cat=torch.max(temp_test_label,0) if temp_test_pred_cat==temp_test_label_cat: num_test_correct+=1 temp_train_loss_mean=temp_train_loss_sum/len(train_features) temp_test_loss_mean=temp_test_loss_sum/len(test_features) temp_train_accuracy=num_train_correct/len(train_features) temp_test_accuracy=num_test_correct/len(test_features) # in case there is gradient explosion problem, initiliza model again and restart training # but the situation seldom happens if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0): epoch=0 my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(), Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),Sigmoid()] my_model_2=Sequential(my_model_design_2) optimizer_2=Adam(my_model_2,lr=1e-3) criterion_2=CrossEntropy() print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('Restart training because of gradient explosion') continue print("Epoch: {}/{}..".format(epoch+1, 25), "Training Loss: {:.4f}..".format(temp_train_loss_mean), "Training Accuracy: {:.4f}..".format(temp_train_accuracy), "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean), "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy), ) epoch+=1 # visualize the classification performance of Model 2 on testing set test_pred_labels_2=[] for i in range(1000): temp_test_feature=test_features[i] temp_test_label=test_labels[i] temp_test_pred=my_model_2.forward(temp_test_feature) _, temp_train_pred_cat=torch.max(temp_test_pred,0) if test_targets[i].int() == temp_train_pred_cat.int(): test_pred_labels_2.append(int(test_targets[i])) else: test_pred_labels_2.append(2) fig,axes = plt.subplots(1,1,figsize=(6,6)) axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_2) axes.set_title('Classification Performance of Model 2') plt.show()
# ######## # X, y = Input(), Input() W1, b1 = Input(), Input() W2, b2 = Input(), Input() W3, b3 = Input(), Input() W1_ = np.random.randn(784, 500) b1_ = np.random.randn(500) W2_ = np.random.randn(500, 100) b2_ = np.random.randn(100) W3_ = np.random.randn(100, 10) b3_ = np.random.randn(10) l1 = Linear(X, W1, b1) s1 = Sigmoid(l1) l2 = Linear(s1, W2, b2) s2 = Sigmoid(l2) l3 = Linear(s2, W3, b3) s3 = Sigmoid(l3) cost = SSE(y, s3) feed_dict = { X: train_x, y: train_y, W1: W1_, b1: b1_, W2: W2_, b2: b2_, W3: W3_,
W2, b2 = Input(), Input() # Train dataset X_ = np.reshape(np.array([[-1., -2., -3.], [1., 2., 3.]]), (2, 3)) W1_ = np.random.randn(3, 2) b1_ = np.random.randn(2) W2_ = np.random.randn(2, 1) b2_ = np.random.randn(1) y_ = np.reshape(np.array([[1.], [0.]]), (-1, 1)) # Test dataset X_t_ = np.reshape(np.array([-1., -2.01, -2.8]), (1, 3)) y_t_ = np.array([1.]) l1 = Linear(X, W1, b1) s1 = Sigmoid(l1) l2 = Linear(s1, W2, b2) cost = L2(y, l2) feed_dict = {X: X_, y: y_, W1: W1_, b1: b1_, W2: W2_, b2: b2_} hyper_parameters = [W1, b1, W2, b2] graph = Network.topological_sort(feed_dict) epoch = 1000000 for i in xrange(epoch): Network.forward_propagation(graph) Network.backward_propagation(graph) Update.stochastic_gradient_descent(hyper_parameters, learning_rate=1e-4) if cost.value < 1e-20:
def test_evaluateDerivativeOnNumbers(self): sigmoid = Sigmoid() self.assertEqual(sigmoid.derivative(2), 2 * (1 - 2))