def forward_propagate(parameters,X, L,dropout=False): """ computes the forward propagation of the nerual network """ caches = {} caches["Z1"] = parameters["W1"].dot(X) + parameters["b1"] caches["a1"] = a.relu(caches["Z1"]) if dropout == True: caches["D1"] = np.random.rand(caches["a1"].shape[0],caches["a1"].shape[1]) < 0.8 caches["a1"] *= caches["D1"] caches["a1"] /= 0.5 caches["Z2"] = parameters["W2"].dot(caches["a1"]) + parameters["b2"] caches["a2"] = a.relu(caches["Z2"]) if dropout == True: caches["D2"] = np.random.rand(caches["a2"].shape[0],caches["a2"].shape[1]) < 0.8 caches["a2"] *= caches["D2"] caches["a2"] /= 0.5 # on the last layer we would like to compute the sigmoid for each examples caches["Z3"] = parameters["W3"].dot(caches["a2"]) + parameters["b3"] caches["a3"] = a.sigmoid(caches["Z3"]) return caches["a3"], caches
def __init__(self, input, n_in, n_out, W=None, b=None, v_W=None, v_b=None, activation=a.relu): self.input = input if W is None: W = theano.shared( np.random.randn(n_out, n_in).astype(dtype=theano.config.floatX) / np.sqrt(n_in)) if b is None: b = theano.shared( np.random.randn(n_out).astype(dtype=theano.config.floatX)) if v_W is None: v_W = theano.shared( np.zeros((n_out, n_in)).astype(dtype=theano.config.floatX)) if v_b is None: v_b = theano.shared( np.zeros(n_out).astype(dtype=theano.config.floatX)) self.W = W self.b = b self.v_W = v_W self.v_b = v_b lin_output = a.relu(T.dot(self.W, input) + self.b.dimshuffle(0, 'x')) self.output = (lin_output if activation is None else activation(lin_output)) self.params = [self.W, self.b] self.velo = [self.v_W, self.v_b]
def activation_forward(self, A_prev, W, b, activation): if activation == 'sigmoid': Z, linear_cache = self.linear_forward(A_prev, W, b) A, activation_cache = sigmoid(Z) elif activation == "tanh": Z, linear_cache = self.linear_forward(A_prev, W, b) A, activation_cache = tanh(Z) elif activation == "relu": Z, linear_cache = self.linear_forward(A_prev, W, b) A, activation_cache = relu(Z) elif activation == "leaky_relu": Z, linear_cache = self.linear_forward(A_prev, W, b) A, activation_cache = leaky_relu(Z) else: print('no activation function') assert (A.shape == (W.shape[0], A_prev.shape[1])) cache = (linear_cache, activation_cache) return A, cache
def add_activation_layer(self, input_layer_id, layer_name, activation_type='relu'): """ Adds the activation layer :param input_layer_id: The input layer identifier :param layer_name: The name of the layer. Type=string :param activation_type: 'relu' for RELU and 'leaky-relu' for Leaky RELU. Default = RELU :return: None """ layer_id = self._get_layer_id(layer_name) assert self._layer_verifier( layer_id), 'Invalid: This layer is already present.' if activation_type == 'relu': self.layers[layer_id] = relu(self.layers[input_layer_id]) elif activation_type == 'leaky-relu': self.layers[layer_id] = leaky_relu(self.layers[input_layer_id]) else: raise ValueError( 'The type of activation can only be one of ["relu", "leaky-relu"]' ) return layer_id
def add_layer(self, n_out, ini=Xavier(), acti=relu(), drop=None): # drop update drop = self.drop # specify how many neurons are passing into current adding layer n_in = self.dims[-1] # create layer with 2 key params: # of output neuron and ini method layer = hidden_layer(n_in, n_out, ini) # set optimizer if (self.optimizer != None): layer.setOptimizer(self.optimizer.clone()) # set dropout layer.setDropout(drop=drop) # set batch normalization if self.norm is not None: layer.setBatchNormalizer(self.norm.clone()) # set activation function layer.setActivation(acti) # update model dimension array self.dims.append(n_out) # update model layers array self.layers.append(layer) print('creating layer with {} neurons, '.format(n_out), 'initialization: {}, '.format(ini.name), 'activation: {}'.format(acti.name))
def activation_function(self, Z): if self.activation_function_name == 'relu': return relu(Z) elif self.activation_function_name == 'sigmoid': return sigmoid(Z) else: return tanh(Z)
def test_relu(): """Test relu activation function""" x = np.array([[0, 1, 3], [-1, 0, -5], [1, 0, 3], [10, -9, -7]]) y = np.array([[0, 1, 3], [0, 0, 0], [1, 0, 3], [10, 0, 0]]) assert np.array_equal(relu(x), y)
def test_relu_deriv(): """Test relu activation function derivative""" x = np.array([[0, 1, 3], [-1, 0, -5], [1, 0, 3], [10, -9, -7]]) y = np.array([[0, 1, 1], [0, 0, 0], [1, 0, 1], [1, 0, 0]]) assert np.array_equal(relu(x, deriv=True), y)
def linear_activation_forward(A_prev, W, b, activation): Z = np.dot(W, A_prev) + b if activation == "sigmoid": A = sigmoid(Z) elif activation == "relu": A = relu(Z) elif activation == "tanh": A = tanh(Z) return A, Z
def test_relu(self): rtol = 1e-5 size = 10 for _ in range(1000): x = np.random.uniform(low=-1000., high=1000., size=size).tolist() test_buffer = list_2_swig_float_pointer(x, size) y_numpy = np.array( tf.keras.activations.relu(tf.constant( x, dtype=tf.float32))).tolist() y_nn4mc = activation.relu(test_buffer.cast(), size) y_nn4mc = swig_py_object_2_list(y_nn4mc, size) assert np.allclose(y_nn4mc, y_numpy, rtol=rtol) print("relu passed")
def feedforward(self, inputs): ''' Returns output from given set of inputs y = A(f(X)) where: A is an activation function and F(X) = x1*w1 + x2*w2... +xn*wn + b for weights wn and bias b ''' total = self.linearsum(inputs) if self.activation == "sigmoid": return sigmoid(total) elif self.activation == "ReLU": return relu(total) elif self.activation == "leaky ReLU": return leakyrelu(total) else: raise Exception("Activation function not recognized")
def single_layer_fp(X, W, b, activation="sigmoid"): l = [] for i in range(0, X.shape[1]): l.append(1) A = np.dot(W, X) + np.outer(b, np.array(l)) if activation == "linear": S = act_fun.linear(A) elif activation == "sigmoid": S = act_fun.sigmoid(beta, A) elif activation == "tanh": S = act_fun.tanh(beta, A) elif activation == "relu": S = act_fun.relu(A) elif activation == "softplus": S = act_fun.softplus(A) elif activation == "elu": S = act_fun.elu(delta, A) elif activation == "softmax": S = act_fun.softmax(A) else: print("Activation function isn't supported") return (A, S)
def __init__(self, input, n_in, n_out, W=None, b=None, v_W=None, v_b=None, activation=a.relu): self.input = input if W is None: W = theano.shared(np.random.randn(n_out, n_in).astype(dtype=theano.config.floatX)/np.sqrt(n_in)) if b is None: b = theano.shared(np.random.randn(n_out).astype(dtype=theano.config.floatX)) if v_W is None: v_W = theano.shared(np.zeros((n_out, n_in)).astype(dtype=theano.config.floatX)) if v_b is None: v_b = theano.shared(np.zeros(n_out).astype(dtype=theano.config.floatX)) self.W = W self.b = b self.v_W = v_W self.v_b = v_b lin_output = a.relu(T.dot(self.W, input) + self.b.dimshuffle(0, 'x')) self.output = ( lin_output if activation is None else activation(lin_output) ) self.params = [self.W, self.b] self.velo = [self.v_W, self.v_b]
def main(argv): # load and pre-process the data X, Predict_data, Y = preprocessed.data_preprocess( parameter.input_data_path) print('| Total train data | structure: {}'.format(X.shape)) print('| Train Data label | structure: {}'.format(Y.shape)) print('| Total test Data | structure: {}'.format(Predict_data.shape)) # split data into train, validation and test train_x, train_y, vali_x, vali_y, test_x, test_y = preprocessed.train_vali_test_split( X, Y, parameter.train_rate, parameter.vali_rate, parameter.test_rate) print("_______________________________________") print('after split\ntrain data shape:\t{}'.format(train_x.shape)) print('train data label:\t{}'.format(train_y.shape)) if vali_x is None: print(" after data pre-process, validation is none") else: print('validation data shape:\t{}'.format(vali_x.shape)) if test_x is None: print(" after data pre-process, test data is none") else: print('test data shape:\t{}'.format(test_x.shape)) print("_______________________________________") # create learning model # a model considers batch size, batch normalization, dropout rate, weight decay and way of optimization learn_model = model(train_x, train_y, batch_size=get_batch_size(), drop=get_dropout_rate(), learning_rate=get_lr(), regularizer=get_regularizer(), norm=get_norm(), optimizer=get_opt()) # set validation data into model learn_model.validation(vali_x, vali_y) # create neural layer1 learn_model.add_layer(parameter.num_hide1, ini=He(), acti=relu()) # layer2 learn_model.add_layer(parameter.num_hide2, ini=He(), acti=relu()) # layer3 learn_model.add_layer(parameter.num_hide3, ini=He(), acti=relu()) # layer4 learn_model.add_last_layer(ini=Xavier(), acti=softmax()) # start training x_rem = learn_model.fit(epoch=parameter.epoch, learning_rate=parameter.learning_rate) # start testing learn_model.test(test_x, test_y) # plot result learn_model.plot(x_rem, True, True) # start predict print("---------- finish predict, save to predict.h5 ----------") predict = learn_model.predict(x=Predict_data).T predict = np.argmax(predict, axis=1) # print(predict) f = h5py.File(parameter.ouput_data_path + "/Predicted_labels.h5", 'a') f.create_dataset('/predict', data=predict, dtype=np.float32) f.close()
def forward_prop(self, X): if self.layer_type is "output": return softmax( np.dot(self.W.T, X) ) else: return relu( np.dot(self.W.T, X) )
empty.fill(255) # has to be less than appropriate element in 'empty' result = np.where(arr < empty, arr, empty) return result # Implementing convolution operation for Edge detection for GrayScale image for i in range(image_pad.shape[0] - 4): for j in range(image_pad.shape[1] - 4): input_image = image_pad[i:i + 3, j:j + 3] output_image1[i, j] = np.sum(input_image * kernel_1) output_image2[i, j] = np.sum(input_image * kernel_2) output_image3[i, j] = np.sum(input_image * kernel_3) output_image1 = pixels(ac.relu(output_image1)) output_image2 = pixels(ac.relu(output_image2)) output_image3 = pixels(ac.relu(output_image3)) def Conv(): """ """ f, axarr = plt.subplots(2, 2, figsize=(10, 10)) #plt.set_cmap('gray') f.suptitle('Kernel 3x3', fontsize=16) axarr[0, 0].imshow(im.original(data))
def test_relu_2(self): self.assertEqual(list(a.relu([-3, -100, 0, 100, 1000, -2])), [0, 0, 0, 100, 1000, 0])
def test_relu_1(self): self.assertEqual(list(a.relu([0.5, -0.2, 0.7])), [0.5, 0, 0.7])
# bin_img = np.append(bin_img, np.array(tmp_img),axis=0) # bin_label = np.append(bin_label, tmp_label, axis=0) # # Python built in types debugging # print(unpack(len(tmp)*'B',tmp)) # print(type(unpack(len(tmp)*'B',tmp))) # print(img_load) # print(label_load) #---------------------------------------------------------------- w_out = np.ones((784, 10)) z_1 = np.dot(layer_img.T, w_1).T + b_1 a_1 = activation.relu(z_1) z_2 = np.dot(a_1.T, w_2).T + b_2 a_2 = activation.relu(z_2) z_3 = np.dot(a_2.T, w_3).T + b_3 a_3 = activation.relu(z_3) z_out = np.dot(a_3.T, w_out).T output = loss.softmax(z_out) t = loss.crossEntropy(output, label) dw_3 = np.dot(a_2, (a_3 - label).T) print(dw_3)
def RELU(x, derivative=False): return relu(0, x, derivative)
X_train, y_train = mnist_reader.load_mnist('data/fashion', kind='train') X_test, y_test = mnist_reader.load_mnist('data/fashion', kind='t10k') X_train = X_train.astype(np.float32) / 255 X_test = X_test.astype(np.float32) / 255 r = np.random.permutation(len(y_train)) X_train = X_train[r] y_train = y_train[r] X_dev = X_train[:12000] y_dev = y_train[:12000] X_train = X_train[10000:] y_train = y_train[10000:] LOG.info("finish data preprocessing.") FCs = [ FullyConnected(784, 256, opts.batch_size, relu()), FullyConnected(256, 128, opts.batch_size, relu()), FullyConnected(128, 64, opts.batch_size, relu()), FullyConnected(64, 10, opts.batch_size, softmax()) ] LOG.info("finish initialization.") n_samples = len(y_train) order = np.arange(n_samples) best_precision, test_precision = 0, 0 for epochs in range(0, opts.epochs): np.random.shuffle(order) cost = 0. for batch_start in range(0, n_samples, opts.batch_size): batch_end = batch_start + opts.batch_size if batch_start \