def get_subwindow_tracking(z, pos_x, pos_y, model_sz, original_sz, avgChans, ctx=mx.cpu()): if original_sz is None: original_sz = model_sz sz = original_sz im_sz = np.shape(z) cen = (sz - 1) / 2 context_xmin = np.floor(pos_x - cen) context_xmax = context_xmin + sz - 1 context_ymin = np.floor(pos_y - cen) context_ymax = context_ymin + sz - 1 left_pad = nd.maximum(0, 1 - context_xmin) top_pad = nd.maximum(0, 1 - context_ymin) right_pad = nd.maximum(0, context_xmax - im_sz[1]) bottom_pad = nd.maximum(0, context_ymax - im_sz[0]) context_xmin = context_xmin + left_pad; context_xmax = context_xmax + left_pad; context_ymin = context_ymin + top_pad; context_ymax = context_ymax + top_pad; paddings = [0, 0, 0, 0, int(top_pad), int(bottom_pad), int(left_pad), int(right_pad)] if avgChans is not None: im_padded_ = z - avgChans im_padded_ = nd.expand_dims(im_padded_, axis = 0) # B H W C im_padded_ = nd.transpose(im_padded_, axes=(0,3,1,2)) # B C H W im_padded_ = nd.pad(im_padded_, pad_width=paddings, mode='constant') im_padded_ = nd.transpose(im_padded_, axes=(0,2,3,1)) # B H W C if avgChans is not None: im_padded_ = im_padded_ + avgChans im_padded = im_padded_[0] im_patch_original = im_padded[int(context_ymin - 1) : int(context_ymax), int(context_xmin - 1) : int(context_xmax), :] if int(model_sz) != int(original_sz): sz_dst_w = np.round(im_patch_original.shape[1] / original_sz * model_sz) sz_dst_h = np.round(im_patch_original.shape[0] / original_sz * model_sz) im_patch = image.fixed_crop(im_patch_original, x0 = 0, y0 = 0, w = im_patch_original.shape[1], h = im_patch_original.shape[0], size = [int(sz_dst_w), int(sz_dst_h)], interp = 1 ) if im_patch.shape[0] != model_sz: im_patch = image.fixed_crop(im_patch_original, x0 = 0, y0 = 0, w = im_patch_original.shape[1], h = im_patch_original.shape[0], size = [int(model_sz), int(model_sz)], interp = 1 ) else: im_patch = im_patch_original return im_patch, im_patch_original
def forward(self, feat): square_sum = nd.sum(nd.square(feat), axis=self.axis, keepdims=True) inv_norm = nd.rsqrt(nd.maximum(square_sum, self.epsilon)) l2_res = nd.multiply(feat, inv_norm) # print(l2_res.shape) return nd.multiply(l2_res.transpose([0, 2, 3, 1]), self.scale.data()).transpose([0, 3, 1, 2])
def activation(X, act_type='relu'): if act_type == 'relu': return nd.maximum(X, nd.zeros_like(X)) elif act_type == 'elu': return nd.LeakyReLU(X, act_type=act_type) else: print('Something wrong with the act_type!')
def forward(self, nodes): h = nodes.data['h'] h_agg = nodes.data['h_agg'] deg = nodes.data['deg'].expand_dims(1) h_concat = nd.concat(h, h_agg / nd.maximum(deg, 1e-6), dim=1) # h_concat = nd.concat(h, h_agg, dim=1) h_new = self.dropout(self.leakyrelu(self.W(h_concat))) return {'h': h_new}
def update(self, index, weight, grad, state): assert (isinstance(weight, NDArray)) assert (isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] with bulk(self._bulk): # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) mean, var = state mean *= self.beta1 mean += (1. - self.beta1) * grad var *= self.beta2 var += (1. - self.beta2) * square(grad) r1 = weight.norm() if not self.bias_correction: r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound) sqrt_var = sqrt(var) sqrt_var += self.epsilon g = mean / sqrt_var g += wd * weight else: # apply bias correction mean_hat = mean / (1. - power(self.beta1, t)) var_hat = var / (1. - power(self.beta2, t)) if self._eps_after_sqrt: sqrt(var_hat, out=var_hat) var_hat += self.epsilon else: var_hat += self.epsilon sqrt(var_hat, out=var_hat) mean_hat /= var_hat mean_hat += wd * weight g = mean_hat r2 = g.norm() # calculate lamb_trust_ratio ratio = r1 / r2 # becomes NaN if ratio == NaN or 0, otherwise 0 nan_or_zero = 1 - ratio / ratio r = where(nan_or_zero, ones_like(ratio), ratio) lr *= r # update weight g *= lr weight[:] -= g
def backward(self, req, out_grad, in_data, out_data, in_grad, aux): if ReluOp.guided_backprop: # Get output and gradients of output y = out_data[0] dy = out_grad[0] # Zero out the negatives in the gradients of the output dy_positives = nd.maximum(dy, nd.zeros_like(dy)) # What output values were greater than 0? y_ones = y.__gt__(0) # Mask out the values for which at least one of dy or y is negative dx = dy_positives * y_ones self.assign(in_grad[0], req[0], dx) else: # Regular backward for ReLU x = in_data[0] x_gt_zero = x.__gt__(0) dx = out_grad[0] * x_gt_zero self.assign(in_grad[0], req[0], dx)
def backward(self, req, out_grad, in_data, out_data, in_grad, aux): if ReluOp.guided_backprop: # Get output and gradients of output y = out_data[0] dy = out_grad[0] # Zero out the negatives in the gradients of the output dy_positives = nd.maximum(dy, nd.zeros_like(dy)) # What output values were greater than 0? y_ones = y.__gt__(0) # Mask out the values for which at least one of dy or y is negative dx = dy_positives * y_ones self.assign(in_grad[0], req[0], dx) else: # Regular backward for ReLU x = in_data[0] x_gt_zero = x.__gt__(0) dx = out_grad[0] * x_gt_zero self.assign(in_grad[0], req[0], dx)
def update(self, index, weight, grad, state): assert(isinstance(weight, NDArray)) assert(isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) mean, var = state mean[:] = self.beta1 * mean + (1. - self.beta1) * grad var[:] = self.beta2 * var + (1. - self.beta2) * square(grad) r1 = weight.norm() if not self.bias_correction: r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound) g = mean / (sqrt(var) + self.epsilon) + wd * weight else: # execution bias correction mean_hat = mean / (1. - power(self.beta1, t)) var_hat = var / (1. - power(self.beta2, t)) g = mean_hat / sqrt(var_hat + self.epsilon) + wd * weight r2 = g.norm() # calculate lamb_trust_ratio r = 1. if r1 == 0. or r2 == 0. else r1 / r2 lr *= r # update weight weight[:] -= lr * g
def relu(X): return nd.maximum(X, nd.zeros_like(X))
def relu(self, X): return nd.maximum(X, 0)
def GD(classifier, alpha, beta, gamma, max_iterations=100, learning_rate=50.0, learning_rate_decay=0.9, momentum=0.5): # Random initialization X = nd.abs(nd.random_normal(scale=1, shape=(1, *classifier.input_shape))) #audio_path_label_pairs = load_audio_path_label_pairs() #shuffle(audio_path_label_pairs) #audio_path, actual_label_id = audio_path_label_pairs[0] #mg = classifier.compute_melgram(audio_path) #X = nd.array(np.expand_dims(mg, axis=0), ctx=classifier.model_ctx) X = X.as_in_context(classifier.model_ctx) # GD with momentum eta = -1.0 * learning_rate prev_grad = nd.zeros(shape=X.shape) losses = [] cls_losses = [] sty_losses = [] pct_losses = [] l1s = [] for t in range(max_iterations): # Projection X = nd.maximum(X, 0.0) X = nd.minimum(X, 1.0) # Save as .csv img = X[0, 0, :, :].asnumpy() np.savetxt('./temp/iter%d.csv' % t, img) # Calculate losses and gradients cls_loss = classifier_loss(X, classifier) sty_loss = style_loss(X) pct_loss = perceptual_loss(X) l1 = l1_regularization(X) # Weighting loss = cls_loss[ 0] + alpha * sty_loss[0] + beta * pct_loss[0] + gamma * l1[0] grad = cls_loss[ 1] + alpha * sty_loss[1] + beta * pct_loss[1] + gamma * l1[1] # Store losses print("Iteration %d: %.2f | (%.2f, %.2f, %.2f, %.2f)" % (t, loss, cls_loss[0], sty_loss[0], pct_loss[0], l1[0])) #print("Iteration %d: %.2f | (%.2f, %.2f, %.2f)" % (t, loss, cls_loss[0], sty_loss[0], pct_loss[0])) losses.append(loss) cls_losses.append(cls_loss[0]) sty_losses.append(sty_loss[0]) pct_losses.append(pct_loss[0]) l1s.append(l1[0]) # Update X = X - eta * (nd.array(grad) + momentum * prev_grad) eta = eta * learning_rate_decay prev_grad = grad
def relu(X): return nd.maximum(X, 0)
def relu(x): return nd.maximum(0, x)
def RELU(X): return nd.maximum(X, 0)
def relu(X): return nd.maximum(X, 0)
def logsigmoid(val): max_elem = nd.maximum(0., -val) z = nd.exp(-max_elem) + nd.exp(-val - max_elem) return -(max_elem + nd.log(z))
def forward(self, is_train, req, in_data, out_data, aux): x = in_data[0] y = nd.maximum(x, nd.zeros_like(x)) self.assign(out_data[0], req[0], y)
def relu(x): return nd.maximum(x, 0) # 自定义ReLU的实现
def relu(X): # ReLU激活函数 return nd.maximum(X, 0)
def relu(X): # 先广播,然后对应的取两者之间的最大值组成返回矩阵 return nd.maximum(X, 0)
def backward(self): """Run backward on the current executor.""" #self.curr_execgrp.backward() # softmax self.get_each_gpu_label() self.logit = nd.exp(self.fc_output)[:] self.logit /= self.global_sum_fc.reshape((self.batchsize, 1))[:] self.grad[:] = self.logit[:] #.copy() #[:] #.copy() #assert self.data_of_cur_gpu.size > 0 if self.data_of_cur_gpu.size > 0: self.grad[self.data_of_cur_gpu, self.label_of_cur_gpu] -= 1.0 self.loss[self.data_of_cur_gpu] = -nd.log( nd.maximum( self.logit[self.data_of_cur_gpu, self.label_of_cur_gpu], 1e-32))[:] else: #print(self.data_of_cur_gpu) pass # margin if self.data_of_cur_gpu.size > 0: grad_fc = self.pick_fc_of_cur_gpu grad_fc.attach_grad() with autograd.record(): s = self.margin_loss(grad_fc) s.backward(self.grad[self.data_of_cur_gpu, self.label_of_cur_gpu]) self.grad[ self.data_of_cur_gpu, self.label_of_cur_gpu] = grad_fc.grad.copy() #[:] #.copy() self.pick_fc_of_cur_gpu = None # fc self.data_batch.attach_grad() #self.weight.attach_grad() self.weight_norm.attach_grad() self.bias.attach_grad() with autograd.record(): no_bias = True if no_bias: nd.FullyConnected(data=self.data_batch, weight=self.weight_norm, no_bias=True, num_hidden=self.classes, out=self.fc_output) else: nd.FullyConnected(data=self.data_batch, weight=self.weight_norm, bias=self.bias, num_hidden=self.classes, out=self.fc_output) self.fc_output.backward(self.grad) self.return_feature_grad = self.data_batch.grad.copy( ) #[:] #.copy() #[:] #.copy() #self.weight_grad += self.weight.grad self.weight_temp_grad[:] = self.weight_norm.grad[:] #self.bias_grad += self.bias.grad # allreduce grad self.return_feature_grad = self.allreduce('return_feature_grad', self.return_feature_grad) assert len(self.return_feature_grad), "rank:{}, grad".format(self.rank) #print('all feature grad:', self.return_feature_grad) self.return_each_gpu_grad = self.return_feature_grad[ self.each_gpu_batchsize * self.rank:self.each_gpu_batchsize * (self.rank + 1)] # l2-norm self.weight.attach_grad() with autograd.record(): s2 = nd.L2Normalization(self.weight, mode='instance') s2.backward(self.weight_temp_grad) #weight_grad) self.weight_grad += self.weight.grad
def relu(x): return nd.maximum(x, 0)
def forward(self, is_train, req, in_data, out_data, aux): x = in_data[0] y = nd.maximum(x, nd.zeros_like(x)) self.assign(out_data[0], req[0], y)