def train_loss(*args): X = args[0] y = args[1] res = X for l in xrange(self.num_layers): prev_res = res res = affine_forward(prev_res, args[self.w_idx(l)], args[self.b_idx(l)]) if l < (self.num_layers - 1): if self.use_batchnorm: res = batchnorm_forward(res, args[self.bn_ga_idx(l)], args[self.bn_bt_idx(l)], self.bn_params[l]) res = relu_forward(res) if self.use_dropout: res = dropout_forward(res, self.dropout_param) scores = res if mode == 'test': return scores #loss, _ = softmax_loss(scores, y) loss = svm_loss(scores, y) return loss
def affine_batchnorm_relu_forward(x, w, b, gamma, beta, bn_param): """ Convenience layer that performs Affine->BatchNorm->ReLU Inputs: - x: Input to the affine layer - w, b: Weights for the affine layer Returns a tuple of: - out: Output from the ReLU - cache: Object to give to the backward pass """ a, fc_cache = affine_forward(x, w, b) b, bn_cache = batchnorm_forward(a, gamma, beta, bn_param) out, relu_cache = relu_forward(b) cache = (fc_cache, bn_cache, relu_cache) return out, cache
def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param): """ Convenience layer that perorms an affine transform, batch normalization and then a Relu activation. Inputs: - x: Input to the affine layer - w, b: Weights for the affine layer Returns a tuple of: - out: Output from the ReLU - cache: Object to give to the backward pass """ out, fc_cache = layers.affine_forward(x, w, b) out, bn_cache = layers.batchnorm_forward(out, gamma, beta, bn_param) out, relu_cache = layers.relu_forward(out) cache = fc_cache, bn_cache, relu_cache, return out, cache
def combo_forward(x, w, b, gamma, beta, bn_param): """ Combo layer forward: FC -> BN -> ReLU Inputs: - x: Input to the affine layer - w, b: Weights for the affine layer Returns a tuple of: - out: Output from the ReLU - cache: Object to give to the backward pass """ bn_cache = None a, fc_cache = affine_forward(x, w, b) if bn_param is not None: a, bn_cache = batchnorm_forward(a, gamma, beta, bn_param) out, relu_cache = relu_forward(a) cache = (fc_cache, bn_cache, relu_cache) return out, cache
# Simulate the forward pass for a two-layer network np.random.seed(231) N, D1, D2, D3 = 200, 50, 60, 3 X = np.random.randn(N, D1) W1 = np.random.randn(D1, D2) W2 = np.random.randn(D2, D3) a = np.maximum(0, X.dot(W1)).dot(W2) print('Before batch normalization:') print(' means: ', a.mean(axis=0)) print(' stds: ', a.std(axis=0)) # Means should be close to zero and stds close to one print('After batch normalization (gamma=1, beta=0)') a_norm, _ = batchnorm_forward(a, np.ones(D3), np.zeros(D3), {'mode': 'train'}) print(' mean: ', a_norm.mean(axis=0)) print(' std: ', a_norm.std(axis=0)) # Now means should be close to beta and stds close to gamma gamma = np.asarray([1.0, 2.0, 3.0]) beta = np.asarray([11.0, 12.0, 13.0]) a_norm, _ = batchnorm_forward(a, gamma, beta, {'mode': 'train'}) print('After batch normalization (nontrivial gamma, beta)') print(' means: ', a_norm.mean(axis=0)) print(' stds: ', a_norm.std(axis=0)) # Check the test-time forward pass by running the training-time # forward pass many times to warm up the running averages, and then # checking the means and variances of activations after a test-time # forward pass.
def loss(self, X, y=None): """ Compute loss and gradient for the fully-connected net. Input / output: Same as TwoLayerNet above. """ X = X.astype(self.dtype) mode = 'test' if y is None else 'train' # Set train/test mode for batchnorm params and dropout param since they # behave differently during training and testing. """ if self.use_dropout: self.dropout_param['mode']=mode """ if self.use_batchnorm: for bn_param in self.bn_params: bn_param['mode'] = mode scores = None ############################################################################ # TODO: Implement the forward pass for the fully-connected net, computing # # the class scores for X and storing them in the scores variable. # # # # When using dropout, you'll need to pass self.dropout_param to each # # dropout forward pass. # # # # When using batch normalization, you'll need to pass self.bn_params[0] to # # the forward pass for the first batch normalization layer, pass # # self.bn_params[1] to the forward pass for the second batch normalization # # layer, etc. # ############################################################################ X_temp = X affine_Input = list() relu_input = list() batchnorm_input = list() dropout_input = list() score_tmp = None for i in range(self.num_layers - 1): tmp, affine_input_tmp = affine_forward( X_temp, self.params['W' + str(i + 1)], self.params['b' + str(i + 1)]) if self.use_batchnorm: tmp, batchnorm_cache = batchnorm_forward( tmp, self.params['gamma' + str(i + 1)], self.params['beta' + str(i + 1)], self.bn_params[i]) batchnorm_input.append(batchnorm_cache) score_tmp, relu_input_tmp = relu_forward(tmp) if self.use_dropout: score_tmp, dropout_cache = dropout_forward( score_tmp, self.dropout_param) dropout_input.append(dropout_cache) affine_Input.append(affine_input_tmp) relu_input.append(relu_input_tmp) X_temp = score_tmp scores, last_input_tmp = affine_forward( score_tmp, self.params['W' + str(self.num_layers)], self.params['b' + str(self.num_layers)]) affine_Input.append(last_input_tmp) ############################################################################ # END OF YOUR CODE # ############################################################################ if mode == 'test': return scores loss, grads = 0.0, {} ############################################################################ # TODO: Implement the backward pass for the fully-connected net. Store the # # loss in the loss variable and gradients in the grads dictionary. Compute # # data loss using softmax, and make sure that grads[k] holds the gradients # # for self.params[k]. Don't forget to add L2 regularization! # # # # When using batch normalization, you don't need to regularize the scale # # and shift parameters. # # # # NOTE: To ensure that your implementation matches ours and you pass the # # automated tests, make sure that your L2 regularization includes a factor # # of 0.5 to simplify the expression for the gradient. # ############################################################################ num_trains = X.shape[0] loss, dscores = softmax_loss(scores, y) weight_decay_sum = 0 for i in range(self.num_layers): tmp = np.sum(self.params['W' + str(i + 1)] * self.params['W' + str(i + 1)]) weight_decay_sum = weight_decay_sum + tmp loss = loss + 0.5 * self.reg * weight_decay_sum #softmax_output=np.exp(scores)/np.sum(np.exp(scores),axis=1).reshape(-1,1) #softmax_output[range(num_trains),list(y)]=softmax_output[range(num_trains),list(y)]-1 dout = dscores for i in range(self.num_layers): dx, dw, db = affine_backward(dout, affine_Input[-(i + 1)]) grads['W' + str(self.num_layers - i)] = dw + self.reg * self.params[ 'W' + str(self.num_layers - i)] grads['b' + str(self.num_layers - i)] = db if self.use_dropout and i != self.num_layers - 1: dx = dropout_backward(dx, dropout_input[-(i + 1)]) if i != self.num_layers - 1: dout = relu_backward(dx, relu_input[-(i + 1)]) if i != self.num_layers - 1 and self.use_batchnorm: dout, dgamma, dbeta = batchnorm_backward( dout, batchnorm_input[-(i + 1)]) grads['gamma' + str(self.num_layers - i - 1)] = dgamma grads['beta' + str(self.num_layers - i - 1)] = dbeta return loss, grads
def affine_norm_relu_forward(x, w, b,gamma,beta,bn_param): a, fc_cache = affine_forward(x, w, b) a1, norm_cache = batchnorm_forward(a, gamma, beta, bn_param) out, relu_cache = relu_forward(a1) cache = (fc_cache,norm_cache, relu_cache) return out, cache