def update(self, index, weight, grad, state): assert (isinstance(weight, NDArray)) assert (isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] with bulk(self._bulk): # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) mean, var = state mean *= self.beta1 mean += (1. - self.beta1) * grad var *= self.beta2 var += (1. - self.beta2) * square(grad) r1 = weight.norm() if not self.bias_correction: r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound) sqrt_var = sqrt(var) sqrt_var += self.epsilon g = mean / sqrt_var g += wd * weight else: # apply bias correction mean_hat = mean / (1. - power(self.beta1, t)) var_hat = var / (1. - power(self.beta2, t)) if self._eps_after_sqrt: sqrt(var_hat, out=var_hat) var_hat += self.epsilon else: var_hat += self.epsilon sqrt(var_hat, out=var_hat) mean_hat /= var_hat mean_hat += wd * weight g = mean_hat r2 = g.norm() # calculate lamb_trust_ratio ratio = r1 / r2 # becomes NaN if ratio == NaN or 0, otherwise 0 nan_or_zero = 1 - ratio / ratio r = where(nan_or_zero, ones_like(ratio), ratio) lr *= r # update weight g *= lr weight[:] -= g
def __getitem__(self, index): img_path, gt_path = self.img_paths[index], self.gt_paths[index] in_exposure = float(img_path.split('/')[-1][9:-5]) gt_exposure = float(gt_path.split('/')[-1][9:-5]) ratio = min(gt_exposure / in_exposure, 300) img = process_img(os.path.join(self.dataset_dir, img_path), self.dataset) img = img[np.newaxis, :] * ratio gt = process_gt(os.path.join(self.dataset_dir, gt_path)) gt = gt[np.newaxis, :] if self.transform is not None: img, gt = self.transform(img, gt) img = F.minimum(img, 1.0) return img, gt
def update(self, index, weight, grad, state): assert(isinstance(weight, NDArray)) assert(isinstance(grad, NDArray)) self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] # preprocess grad grad *= self.rescale_grad if self.clip_gradient is not None: grad = clip(grad, -self.clip_gradient, self.clip_gradient) mean, var = state mean[:] = self.beta1 * mean + (1. - self.beta1) * grad var[:] = self.beta2 * var + (1. - self.beta2) * square(grad) r1 = weight.norm() if not self.bias_correction: r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound) g = mean / (sqrt(var) + self.epsilon) + wd * weight else: # execution bias correction mean_hat = mean / (1. - power(self.beta1, t)) var_hat = var / (1. - power(self.beta2, t)) g = mean_hat / sqrt(var_hat + self.epsilon) + wd * weight r2 = g.norm() # calculate lamb_trust_ratio r = 1. if r1 == 0. or r2 == 0. else r1 / r2 lr *= r # update weight weight[:] -= lr * g
def GD(classifier, alpha, beta, gamma, max_iterations=100, learning_rate=50.0, learning_rate_decay=0.9, momentum=0.5): # Random initialization X = nd.abs(nd.random_normal(scale=1, shape=(1, *classifier.input_shape))) #audio_path_label_pairs = load_audio_path_label_pairs() #shuffle(audio_path_label_pairs) #audio_path, actual_label_id = audio_path_label_pairs[0] #mg = classifier.compute_melgram(audio_path) #X = nd.array(np.expand_dims(mg, axis=0), ctx=classifier.model_ctx) X = X.as_in_context(classifier.model_ctx) # GD with momentum eta = -1.0 * learning_rate prev_grad = nd.zeros(shape=X.shape) losses = [] cls_losses = [] sty_losses = [] pct_losses = [] l1s = [] for t in range(max_iterations): # Projection X = nd.maximum(X, 0.0) X = nd.minimum(X, 1.0) # Save as .csv img = X[0, 0, :, :].asnumpy() np.savetxt('./temp/iter%d.csv' % t, img) # Calculate losses and gradients cls_loss = classifier_loss(X, classifier) sty_loss = style_loss(X) pct_loss = perceptual_loss(X) l1 = l1_regularization(X) # Weighting loss = cls_loss[ 0] + alpha * sty_loss[0] + beta * pct_loss[0] + gamma * l1[0] grad = cls_loss[ 1] + alpha * sty_loss[1] + beta * pct_loss[1] + gamma * l1[1] # Store losses print("Iteration %d: %.2f | (%.2f, %.2f, %.2f, %.2f)" % (t, loss, cls_loss[0], sty_loss[0], pct_loss[0], l1[0])) #print("Iteration %d: %.2f | (%.2f, %.2f, %.2f)" % (t, loss, cls_loss[0], sty_loss[0], pct_loss[0])) losses.append(loss) cls_losses.append(cls_loss[0]) sty_losses.append(sty_loss[0]) pct_losses.append(pct_loss[0]) l1s.append(l1[0]) # Update X = X - eta * (nd.array(grad) + momentum * prev_grad) eta = eta * learning_rate_decay prev_grad = grad