def main(): inputs = np.random.random((5, 5)) autoencoder = Autoencoder([ FCLayer((5, 4), SigmoidActivationFunction(), True), FCLayer((4, 3), SigmoidActivationFunction(), True), FCLayer((3, 4), SigmoidActivationFunction(), True), FCLayer((4, 5), SigmoidActivationFunction(), True) ]) w = np.random.normal(size=autoencoder.net.params_number) autoencoder.net.set_weights(w) loss, loss_grad = autoencoder.compute_loss(inputs) num_params = autoencoder.net.params_number p = np.zeros((autoencoder.net.params_number)) check_loss_grad = np.zeros((autoencoder.net.params_number)) for i in range(num_params): p[:] = 0 p[i] = 1 check_loss_grad[i] = \ check_grad(lambda x: loss_func(autoencoder, x, inputs), w, p) max_diff = np.abs(loss_grad - check_loss_grad).max() min_diff = np.abs(loss_grad - check_loss_grad).min() print("compute_loss") print("min_diff = ", min_diff) print("max_diff = ", max_diff)
def __init__(self, nodes, mu_dt, act_fn, use_bias=False, kaiming_init=False, pe_fn=minus, pe_fn_inverse=add): self.nodes = nodes self.mu_dt = mu_dt self.n_nodes = len(nodes) self.n_layers = len(nodes) - 1 self.pe_fn = pe_fn self.pe_fn_inverse = pe_fn_inverse self.eps = 1e-6 self.layers = [] for l in range(self.n_layers): _act_fn = utils.Linear() if (l == self.n_layers - 1) else act_fn layer = FCLayer( in_size=nodes[l], out_size=nodes[l + 1], act_fn=_act_fn, use_bias=use_bias, kaiming_init=kaiming_init, ) self.layers.append(layer)
def _init_params(self, load_from): """ Instantiate layers and store their learnable parameters and state info Load parameter values from file if applicable """ self._params = OrderedDict() self._prev_dims = OrderedDict() # not learnable def add_states(layer, state_dim): if state_dim <= 0: return self._prev_dims[layer.pfx('prev')] = state_dim if self._options['learn_init_states']: self._params[layer.pfx('init')] = np.zeros(state_dim) \ .astype('float32') self._layers = [] # one-hot encoder self._layers.append(OneHotLayer(self._pfx + "OneHot")) self._layers[0].add_param(params=self._params, n_in=1, n_out=self._options['input_dim'], options=self._options) # main recurrent layers unit = self._options['unit_type'].upper() D = self._options['net_depth'] assert D > 0 for i in range(1, 1 + D): self._layers.append(eval(unit + 'Layer') \ (self._pfx + unit + '_' + str(i))) state_dim = self._layers[i].add_param \ (params = self._params, n_in = self._options['net_width'] if i > 1 else \ self._options['input_dim'], n_out = self._options['net_width'], options = self._options) add_states(self._layers[i], state_dim) # softmax before cross-entropy loss assert self._options['loss_type'] == 'crossentropy' self._layers.append(FCLayer(self._pfx + 'Softmax')) state_dim = self._layers[D + 1].add_param \ (params = self._params, n_in = self._options['net_width'], n_out = self._options['target_dim'], options = self._options, act = 'lambda x: tt.nnet.softmax(x)') add_states(self._layers[D + 1], state_dim) if load_from is not None: len_pfx = len(self._pfx) params = np.load(load_from + '/params.npz') # NpzFile object for k in iterkeys(self._params): self._params[k] = params[k[len_pfx:]] # no pfx in saved params
def main(): inputs = np.random.random((5, 5)) autoencoder = Autoencoder([ FCLayer((5, 4), SigmoidActivationFunction(), True), FCLayer((4, 3), SigmoidActivationFunction(), True), FCLayer((3, 4), SigmoidActivationFunction(), True), FCLayer((4, 5), SigmoidActivationFunction(), True) ]) w = np.random.normal(size=autoencoder.net.params_number) autoencoder.net.set_weights(w) loss, loss_grad = autoencoder.compute_loss(inputs) p = np.random.normal(size=autoencoder.net.params_number) Rp_loss_grad = autoencoder.compute_hessvec(p) hess = np.zeros((autoencoder.net.params_number)) check_Rp_loss_grad = \ check_grad(lambda x: loss_func_grad(autoencoder, x, inputs), w, p) max_diff = np.abs(Rp_loss_grad - check_Rp_loss_grad).max() min_diff = np.abs(Rp_loss_grad - check_Rp_loss_grad).min() print("compute_hessvec") print("min_diff = ", min_diff) print("max_diff = ", max_diff)
def __init__(self, model_dir, args): self.args = args self.num_o_labels = args.num_o_labels self.num_m_labels = args.num_m_labels self.config_class, _, config_model = MODEL_CLASSES[args.model_type] bert_config = self.config_class.from_pretrained(args.model_name_or_path) super(LanguageHierarchicalRelationClassification, self).__init__(bert_config) self.bert = config_model.from_pretrained(args.model_name_or_path, config=bert_config) # Load pretrained bert self.trans_layer = [] self.trans_weight_layer = [] self.o_label_emb = nn.Embedding(self.num_o_labels, args.o_label_dim) for i in range(self.num_o_labels): self.trans_layer.append(KongJianTrans(bert_config.hidden_size, args.trans_dim)) self.trans_weight_layer.append(FCLayer(args.trans_dim, args.o_label_dim, dropout_rate=0, use_activation=False)) # self.cls_fc_layer = FCLayer(bert_config.hidden_size, bert_config.hidden_size, args.dropout_rate) # self.e1_fc_layer = FCLayer(bert_config.hidden_size, bert_config.hidden_size, args.dropout_rate) # self.e2_fc_layer = FCLayer(bert_config.hidden_size, bert_config.hidden_size, args.dropout_rate) self.cls_fc_layer = FCLayer(args.trans_dim, args.trans_dim, args.dropout_rate) self.e1_fc_layer = FCLayer(args.trans_dim, args.trans_dim, args.dropout_rate) self.e2_fc_layer = FCLayer(args.trans_dim, args.trans_dim, args.dropout_rate) if self.args.is_muti_label: self.fc = FCLayerSigmoid(args.trans_dim*3, self.num_m_labels) else: self.fc = FCLayerSoftmax(args.trans_dim*3, self.num_m_labels) self.softmax = nn.Softmax(-1) # loss self.loss_fct_bce = nn.BCELoss() self.init_weights()
sgd = SGD(learning_rate_SGD, weight_decay) # ## 1.1 MLP with Euclidean Loss and Sigmoid Activation Function # Build and train a MLP contraining one hidden layer with 128 units using Sigmoid activation function and Euclidean loss function. # # ### TODO # Before executing the following code, you should complete **layers/fc_layer.py** and **layers/sigmoid_layer.py**. # In[7]: from layers import FCLayer, SigmoidLayer sigmoidMLP = Network() # Build MLP with FCLayer and SigmoidLayer # 128 is the number of hidden units, you can change by your own sigmoidMLP.add(FCLayer(784, 128)) sigmoidMLP.add(SigmoidLayer()) sigmoidMLP.add(FCLayer(128, 10)) # In[15]: sigmoidMLP, sigmoid_loss, sigmoid_acc = train(sigmoidMLP, criterion, sgd, data_train, max_epoch, batch_size, disp_freq) # In[16]: test(sigmoidMLP, criterion, data_test, batch_size, disp_freq) # ## 1.2 MLP with Euclidean Loss and ReLU Activation Function # Build and train a MLP contraining one hidden layer with 128 units using ReLU activation function and Euclidean loss function.
def main(): ''' use of an autocoder param path: path to folder where you are loading MNIST param type: type of gradient function (sgd, sgd_momentum, rmsprop, adam) param train_size: train data size param test_size: test data size param num_epoch: number of epochs param minibatch_size: minibatch size param momentum: momentum param display: print to display ''' options = parse_args() mnist = fetch_mldata('MNIST original', data_home=options['path']) data = mnist.data.astype('float64') train_size = options['train_size'] train_data = data[np.random.choice(data.shape[0], train_size, False), :] test_size = options['test_size'] test_data = data[np.random.choice(data.shape[0], test_size, False), :] autoencoder = Autoencoder([ FCLayer((784, 250), SigmoidActivationFunction(), True), FCLayer((250, 50), SigmoidActivationFunction(), True), FCLayer((50, 2), SigmoidActivationFunction(), True), FCLayer((2, 50), LinearActivationFunction(), True), FCLayer((50, 250), SigmoidActivationFunction(), True), FCLayer((250, 784), SigmoidActivationFunction(), True) ]) if options['type'] == 'sgd': res = autoencoder.run_sgd(train_data.transpose(), step_size=1.0, momentum=0, num_epoch=options['num_epoch'], minibatch_size=options['minibatch_size'], l2_coef=1e-4, test_inputs=test_data.transpose(), display=options['display']) elif options['type'] == 'sgd_momentum': res = autoencoder.run_sgd(train_data.transpose(), step_size=1.0, momentum=options['momentum'], num_epoch=options['num_epoch'], minibatch_size=options['minibatch_size'], l2_coef=1e-4, test_inputs=test_data.transpose(), display=options['display']) elif options['type'] == 'rmsprop': res = autoencoder.run_rmsprop(train_data.transpose(), step_size=1.0, num_epoch=options['num_epoch'], minibatch_size=options['minibatch_size'], l2_coef=1e-4, test_inputs=test_data.transpose(), display=options['display']) elif options['type'] == 'adam': res = autoencoder.run_adam(train_data.transpose(), step_size=1.0, num_epoch=options['num_epoch'], minibatch_size=options['minibatch_size'], l2_coef=1e-4, test_inputs=test_data.transpose(), display=options['display']) print(res) plt.title('test loss') plt.scatter(np.arange(len(res['test_loss'])), res['test_loss']) plt.show()
def _init_params(self, load_from): """ Instantiate layers and store their learnable parameters and state info Load parameter values from file if applicable """ self._params = OrderedDict() self._prev_dims = OrderedDict() # not learnable def add_states(layer, state_dim): if state_dim <= 0: return self._prev_dims[layer.pfx('prev')] = state_dim if self._options['learn_init_states']: self._params[layer.pfx('init')] = np.zeros(state_dim) \ .astype('float32') # optional ID embedder if not self._options['learn_id_embedding']: add = 0 else: self._id_embedder = FCLayer(self._pfx + 'FC_id_embedder') state_dim = self._id_embedder.add_param \ (params = self._params, n_in = self._options['id_count'], n_out = self._options['id_embedding_dim'], options = self._options, act = 'lambda x: x') add_states(self._id_embedder, state_dim) add = self._options['id_embedding_dim'] # main recurrent layers unit = self._options['unit_type'].upper() self._layers = [] D = self._options['net_depth'] assert D > 0 for i in range(D): self._layers.append(eval(unit + 'Layer') \ (self._pfx + unit + '_' + str(i))) state_dim = self._layers[i].add_param \ (params = self._params, n_in = add + (self._options['net_width'] if i > 0 else \ self._options['input_dim']), n_out = self._options['net_width'], options = self._options) add_states(self._layers[i], state_dim) # final FCLayer for dimension compression self._layers.append(FCLayer(self._pfx + 'FC_output')) state_dim = self._layers[D].add_param \ (params = self._params, n_in = add + self._options['net_width'], n_out = self._options['target_dim'], options = self._options, act = 'lambda x: x') add_states(self._layers[D], state_dim) if load_from is not None: len_pfx = len(self._pfx) params = np.load(load_from + '/params.npz') # NpzFile object for k in iterkeys(self._params): self._params[k] = params[k[len_pfx :]] # no pfx in saved params
import numpy as np from layers import FCLayer, ActivationLayer from functions import Activation, ActivationPrime from functions import Loss, LossPrime from network import Network # Training data x_train = np.array([[[0, 0]], [[0, 1]], [[1, 0]], [[1, 1]]]) y_train = np.array([[[0]], [[1]], [[1]], [[0]]]) # Network architecture net = Network() net.add(FCLayer(2, 3)) net.add(ActivationLayer(Activation.tanh, ActivationPrime.tanh_prime)) net.add(FCLayer(3, 1)) net.add(ActivationLayer(Activation.tanh, ActivationPrime.tanh_prime)) # Train your network net.use(Loss.mse, LossPrime.mse_prime) net.fit(x_train, y_train, epochs=1000, alpha=0.1) # Test out = net.predict(x_train) print(out)
def __init__(self, config): self.config = config batch_size = config['batch_size'] flag_datalayer = config['use_data_layer'] lib_conv = config['lib_conv'] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data x = T.ftensor4('x') y = T.ivector('y') rand = T.fvector('rand') print '... building the model' self.layers = [] params = [] weight_types = [] if flag_datalayer: data_layer = DataLayer(input=x, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1_input = data_layer.output else: layer1_input = x convpool_layer1 = ConvPoolLayer(input=layer1_input, image_shape=(3, 227, 227, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, ) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, 27, 27, batch_size), filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, 13, 13, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096) fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096) softmax_layer8 = SoftmaxLayer( input=dropout_layer7.output, n_in=4096, n_out=1000) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.cost = softmax_layer8.negative_log_likelihood(y) self.errors = softmax_layer8.errors(y) self.errors_top_5 = softmax_layer8.errors_top_x(y, 5) self.params = params self.x = x self.y = y self.rand = rand self.weight_types = weight_types self.batch_size = batch_size
def __init__(self, config): self.config = config batch_size = config.batch_size lib_conv = config.lib_conv group = (2 if config.grouping else 1) LRN = (True if config.LRN else False) print 'LRN, group', LRN, group # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data x = T.ftensor4('x') y = T.lvector('y') print '... building the model with ConvLib %s, LRN %s, grouping %i ' \ % (lib_conv, LRN, group) self.layers = [] params = [] weight_types = [] layer1_input = x convpool_layer1 = ConvPoolLayer( input=layer1_input, image_shape=((3, 224, 224, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 3, 227, 227)), filter_shape=((3, 11, 11, 96) if lib_conv == 'cudaconvnet' else (96, 3, 11, 11)), convstride=4, padsize=(0 if lib_conv == 'cudaconvnet' else 3), group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=LRN, lib_conv=lib_conv) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer( input=convpool_layer1.output, image_shape=((96, 27, 27, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 96, 27, 27)), filter_shape=((96, 5, 5, 256) if lib_conv == 'cudaconvnet' else (256, 96, 5, 5)), convstride=1, padsize=2, group=group, poolsize=3, poolstride=2, bias_init=0.1, lrn=LRN, lib_conv=lib_conv, ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer( input=convpool_layer2.output, image_shape=((256, 13, 13, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 256, 13, 13)), filter_shape=((256, 3, 3, 384) if lib_conv == 'cudaconvnet' else (384, 256, 3, 3)), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer( input=convpool_layer3.output, image_shape=((384, 13, 13, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 384, 13, 13)), filter_shape=((384, 3, 3, 384) if lib_conv == 'cudaconvnet' else (384, 384, 3, 3)), convstride=1, padsize=1, group=group, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer( input=convpool_layer4.output, image_shape=((384, 13, 13, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 384, 13, 13)), filter_shape=((384, 3, 3, 256) if lib_conv == 'cudaconvnet' else (256, 384, 3, 3)), convstride=1, padsize=1, group=group, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type if lib_conv == 'cudaconvnet': fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) else: fc_layer6_input = convpool_layer5.output.flatten(2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output) fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type dropout_layer7 = DropoutLayer(fc_layer7.output) softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.cost = softmax_layer8.negative_log_likelihood(y) self.errors = softmax_layer8.errors(y) self.errors_top_5 = softmax_layer8.errors_top_x(y, 5) self.params = params self.x = x self.y = y # self.rand = rand self.weight_types = weight_types self.batch_size = batch_size
def __init__(self, config): self.config = config batch_size = config['batch_size'] batch_size = config['batch_size'] flag_datalayer = config['use_data_layer'] lib_conv = config['lib_conv'] layers = [] params = [] weight_types = [] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data x1 = T.ftensor4('x1') x2 = T.ftensor4('x2') y = T.lvector('y') # The ground truth to be compared with will go here rand1 = T.fvector('rand1') rand2 = T.fvector('rand2') print '... building the model' if flag_datalayer: data_layerA = DataLayer(input=x1, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1A_input = data_layerA.output else: layer1A_input = x1 if flag_datalayer: data_layerB = DataLayer(input=x2, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1B_input = data_layerB.output else: layer1B_input = x2 fc_layer2_input = T.concatenate( (T.flatten(layer1A_input.dimshuffle(3, 0, 1, 2), 2), T.flatten(layer1B_input.dimshuffle(3, 0, 1, 2), 2)), axis=1) fc_layer2 = FCLayer(input=fc_layer2_input, n_in=154587 * 2, n_out=4096) layers.append(fc_layer2) params += fc_layer2.params weight_types += fc_layer2.weight_type dropout_layer2 = DropoutLayer(fc_layer2.output, n_in=4096, n_out=4096) fc_layer3 = FCLayer(input=dropout_layer2.output, n_in=4096, n_out=4096) layers.append(fc_layer3) params += fc_layer3.params weight_types += fc_layer3.weight_type dropout_layer3 = DropoutLayer(fc_layer3.output, n_in=4096, n_out=4096) # Final softmax layer softmax_layer3 = SoftmaxLayer( input=dropout_layer3.output, n_in=4096, n_out=2) # Only a single binary output is required! layers.append(softmax_layer3) params += softmax_layer3.params weight_types += softmax_layer3.weight_type # #################### NETWORK BUILT ####################### self.cost = softmax_layer3.negative_log_likelihood(y) self.errors = softmax_layer3.errors(y) self.errors_top_5 = softmax_layer3.errors_top_x(y, 5) self.x1 = x1 self.x2 = x2 self.y = y self.rand1 = rand1 self.rand2 = rand2 self.layers = layers self.params = params self.weight_types = weight_types self.batch_size = batch_size
# # training data # x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]]) # one_hot_encoded_y_train = np.array([[[1, 0]], [[0, 1]], [[0, 1]], [[0, 1]]]) # y_train =[0, 1, 1, 1] # this line is used to catch the errors arising from numpy. np.seterr(all='raise') input_number = x_train.shape[2] output_number = 6 size_of_hidden_layer = 10 neural_network = NeuralNetwork(cross_entropy, cross_entropy_prime) neural_network.add_layer( FCLayer(input_number, size_of_hidden_layer, diminishing_factor=10)) neural_network.add_layer(ActivationLayer(swish, swish_prime)) neural_network.add_layer(FCLayer(size_of_hidden_layer, output_number)) neural_network.add_layer(ActivationLayer(softmax, softmax_prime)) neural_network.fit(x_train, one_hot_encoded_y_train, epoch_number=10, initial_learning_rate=0.5, decay=0.01) out = neural_network.predict(x_train) predictions = argmax(out) print("confusion matrix:", confusion_matrix(y_train, predictions), sep="\n") print("accuracy: ", accuracy_score(y_train, predictions)) print("end")
def __init__(self, config, testMode): self.config = config batch_size = config['batch_size'] lib_conv = config['lib_conv'] useLayers = config['useLayers'] #imgWidth = config['imgWidth'] #imgHeight = config['imgHeight'] initWeights = config['initWeights'] #if we wish to initialize alexnet with some weights. #need to make changes in layers.py to accept initilizing weights if initWeights: weightsDir = config['weightsDir'] weightFileTag = config['weightFileTag'] prob_drop = config['prob_drop'] # ##################### BUILD NETWORK ########################## x = T.ftensor4('x') mean = T.ftensor4('mean') #y = T.lvector('y') print '... building the model' self.layers = [] params = [] weight_types = [] if useLayers >= 1: convpool_layer1 = ConvPoolLayer(input=x-mean, image_shape=(3, None, None, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_0'+weightFileTag, 'b_0'+weightFileTag] ) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type if useLayers >= 2: convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, None, None, batch_size), #change from 27 to appropriate value sbased on conv1's output filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_1'+weightFileTag, 'W1_1'+weightFileTag, 'b0_1'+weightFileTag, 'b1_1'+weightFileTag] ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type if useLayers >= 3: convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, None, None, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_2'+weightFileTag, 'b_2'+weightFileTag] ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type if useLayers >= 4: convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, None, None, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_3'+weightFileTag, 'W1_3'+weightFileTag, 'b0_3'+weightFileTag, 'b1_3'+weightFileTag] ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type if useLayers >= 5: convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, None, None, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_4'+weightFileTag, 'W1_4'+weightFileTag, 'b0_4'+weightFileTag, 'b1_4'+weightFileTag] ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type if useLayers >= 6: fc_layer6_input = T.flatten(convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_5'+weightFileTag, 'b_5'+weightFileTag]) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type if testMode: dropout_layer6 = fc_layer6 else: dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, prob_drop=prob_drop) if useLayers >= 7: fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_6'+weightFileTag, 'b_6'+weightFileTag]) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type if testMode: dropout_layer6 = fc_layer7 else: dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, prob_drop=prob_drop) if useLayers >= 8: softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_7'+weightFileTag, 'b_7'+weightFileTag]) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.output = self.layers[useLayers-1] self.params = params self.x = x self.mean = mean self.weight_types = weight_types self.batch_size = batch_size self.useLayers = useLayers self.outLayer = self.layers[useLayers-1] meanVal = np.load(config['mean_file']) meanVal = meanVal[:, :, :, np.newaxis].astype('float32') #x is 4d, with 'batch' number of images. meanVal has only '1' in the 'batch' dimension. subtraction wont work. meanVal = np.tile(meanVal,(1,1,1,batch_size)) self.meanVal = meanVal #meanVal = np.zeros([3,imgHeight,imgWidth,2], dtype='float32') if useLayers >= 8: #if last layer is softmax, then its output is y_pred finalOut = self.outLayer.y_pred else: finalOut = self.outLayer.output self.forwardFunction = theano.function([self.x, In(self.mean, value=meanVal)], [finalOut])
from layers import FCLayer import numpy as np from utils.check_grads import check_grads_layer batch = 10 inputs = np.random.uniform(size=(10, 20)) out_features = 100 layer = FCLayer(in_features=inputs.shape[1], out_features=out_features) in_grads = np.random.uniform(size=(batch, out_features)) check_grads_layer(layer, inputs, in_grads)
import numpy as np from layers import FCLayer from utils.tools import rel_error import keras from keras import layers from keras import models from keras import optimizers from keras import backend as K import warnings warnings.filterwarnings('ignore') inputs = np.random.uniform(size=(10, 20)) layer = FCLayer(in_features=inputs.shape[1], out_features=100) out = layer.forward(inputs) keras_model = models.Sequential() keras_layer = layers.Dense(100, input_shape=inputs.shape[1:], use_bias=True, kernel_initializer='random_uniform', bias_initializer='zero') # print (len(keras_layer.get_weights())) keras_model.add(keras_layer) sgd = optimizers.SGD(lr=0.01) keras_model.compile(loss='mean_squared_error', optimizer='sgd') keras_layer.set_weights([layer.weights, layer.bias]) keras_out = keras_model.predict(inputs, batch_size=inputs.shape[0]) print('Relative error (<1e-6 will be fine): ', rel_error(out, keras_out))
# Build ConvNet with ConvLayer and PoolingLayer with open('params.npy', 'rb') as f: conv1 = ConvLayer(in_channels=3, out_channels=8, kernel_size=11) conv1.W = np.load(f) conv1.b = np.load(f) convNet.add(conv1) convNet.add(ReLULayer()) convNet.add(MaxPoolingLayer(kernel_size=10)) conv2 = ConvLayer(in_channels=8, out_channels=16, kernel_size=6) conv2.W = np.load(f) conv2.b = np.load(f) convNet.add(conv2) convNet.add(ReLULayer()) convNet.add(MaxPoolingLayer(kernel_size=3)) convNet.add(ReshapeLayer((batch_size, 16, 6, 6), (batch_size, 576))) fc1 = FCLayer(576, 64) fc1.W = np.load(f) fc1.b = np.load(f) convNet.add(fc1) convNet.add(ReLULayer()) fc2 = FCLayer(64, 2) fc2.W = np.load(f) fc2.b = np.load(f) convNet.add(fc2) img = Image.open('./ImageRecognition/trainingset_image/d_f18.jpg') width, height = (img.size[0], img.size[0]) if img.size[0] < img.size[1] else ( img.size[1], img.size[1]) # Get dimensions left = (img.size[0] - width) / 2
def __init__(self, config): ModelBase.__init__(self) self.config = config self.verbose = self.config['verbose'] self.name = 'alexnet' batch_size = config['batch_size'] flag_datalayer = config['use_data_layer'] lib_conv = config['lib_conv'] n_softmax_out = config['n_softmax_out'] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data x = T.ftensor4('x') y = T.lvector('y') rand = T.fvector('rand') lr = T.scalar('lr') if self.verbose: print 'AlexNet 2/16' self.layers = [] params = [] weight_types = [] if flag_datalayer: data_layer = DataLayer(input=x, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1_input = data_layer.output else: layer1_input = x convpool_layer1 = ConvPoolLayer(input=layer1_input, image_shape=(3, 227, 227, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, 27, 27, batch_size), filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, 13, 13, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, verbose=self.verbose) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, verbose=self.verbose) fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, verbose=self.verbose) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, verbose=self.verbose) softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=n_softmax_out, verbose=self.verbose) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.p_y_given_x = softmax_layer8.p_y_given_x self.y_pred = softmax_layer8.y_pred self.output = self.p_y_given_x self.cost = softmax_layer8.negative_log_likelihood(y) self.error = softmax_layer8.errors(y) if n_softmax_out < 5: self.error_top_5 = softmax_layer8.errors_top_x(y, n_softmax_out) else: self.error_top_5 = softmax_layer8.errors_top_x(y, 5) self.params = params # inputs self.x = x self.y = y self.rand = rand self.lr = lr self.shared_x = theano.shared( np.zeros( (3, config['input_width'], config['input_height'], config['file_batch_size']), # for loading large batch dtype=theano.config.floatX), borrow=True) self.shared_y = theano.shared(np.zeros((config['file_batch_size'], ), dtype=int), borrow=True) self.shared_lr = theano.shared(np.float32(config['learning_rate'])) # training related self.base_lr = np.float32(config['learning_rate']) self.step_idx = 0 self.mu = config['momentum'] # def: 0.9 # momentum self.eta = config['weight_decay'] #0.0002 # weight decay self.weight_types = weight_types self.batch_size = batch_size self.grads = T.grad(self.cost, self.params) subb_ind = T.iscalar('subb') # sub batch index #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval() self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:, :, :, subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] self.shared_y_slice = self.shared_y[subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size]
return 0.5 * np.sum((inputs - outputs)**2, axis=0).mean() def loss_function_grad(net, w, inputs): net.set_weights(w) outputs = net.compute_outputs(inputs) exact_grad = net.compute_loss_grad(outputs - inputs) return exact_grad print('>>> Testing basic functionality...') inputs = np.random.normal(size=(5, 50)) hidden_layer_1 = FCLayer(shape=(5, 2), afun=SigmoidActivationFunction(), use_bias=True) hidden_layer_2 = FCLayer(shape=(2, 5), afun=SigmoidActivationFunction(), use_bias=True) num_params = hidden_layer_1.get_params_number( ) + hidden_layer_2.get_params_number() w0 = np.random.normal(size=num_params) net = FFNet([hidden_layer_1, hidden_layer_2]) approx_grad = np.zeros_like(w0) p = np.zeros_like(w0)