def closure(): # Compute Loss Loss target_features_style = vgg(mean_shift(target_img)) target_gram_style = [gram_matrix(y) for y in target_features_style] blend_features_style = vgg(mean_shift(first_pass_img)) blend_gram_style = [gram_matrix(y) for y in blend_features_style] style_loss = 0 for layer in range(len(blend_gram_style)): style_loss += mse(blend_gram_style[layer], target_gram_style[layer]) style_loss /= len(blend_gram_style) style_loss *= style_weight # Compute Content Loss content_features = vgg(mean_shift(first_pass_img)) content_loss = content_weight * mse(blend_features_style.relu2_2, content_features.relu2_2) # Compute Total Loss and Update Image loss = style_loss + content_loss optimizer.zero_grad() loss.backward() # Print Loss if run[0] % 1 == 0: print("run {}:".format(run)) print(' style : {:4f}, content: {:4f}'.format(\ style_loss.item(), \ content_loss.item() )) print() run[0] += 1 return loss
def train_step(self, source, style, optimizer): with tf.GradientTape() as tape: st = self.encode(style) y = self(source) #print(style.shape) style_loss = tf.add_n([ tf.reduce_mean((utils.gram_matrix(st) - utils.gram_matrix(self.encode(y)))**2) ]) # reduce_mean Computes the mean of elements across dimensions of a tensor. # add_n Adds all input tensors element-wise. style_loss *= style_weight prediction = self(source) loss_identity = identity_lr * learning_rate * ( 0.2 * tf.reduce_mean( tf.reduce_sum(cross_entropy(source, prediction))) + tf.reduce_mean(tf.reduce_sum((source - prediction)**2))) #kl_loss = -0.5 * (1 + w_log_var - tf.square(w_mean) - tf.exp(w_log_var)) #kl_loss = kl_lr * tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1)) * learning_rate kl_loss = 0 loss = (loss_identity + kl_loss) + style_loss grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return loss_identity, kl_loss, style_loss
def feature_selection(self): logging.debug('Getting feature scores') kernel_linear = kernels.kernel_wrapper(kernels.linear) kernel_gaussian = kernels.kernel_wrapper(kernels.gaussian, gamma=0.01) train_indices = [] best_features = [] for line in open('../results/feature_selection/feature_score'): if ':' in line: best_features.append(int(line.split(':')[0])) else: line = line.strip() if line.startswith('['): line = line[1:] elif line.endswith(']'): line = line[:-1] for s in line.split(): if s == 'True': train_indices.append(True) else: train_indices.append(False) train_indices = np.array(train_indices) best_features = np.array(best_features) with open('%s/best_features' % self.tmp_folder, 'w') as fout: for subset_size in range(10, min(best_features.shape[0], 1001), 10): logging.debug("Doing %s", subset_size) self.read_data(best_features[:subset_size]) self.preprocess_data() K_linear = utils.gram_matrix(self.X, kernel_linear) K_gaussian = utils.gram_matrix(self.X, kernel_gaussian) models = [] for C in range(self.C_steps): models.append( MulticlassSVM(kernel_linear, 0.001 * 2**C, K_linear)) models.append(MulticlassSVM(kernel_gaussian, 8.192, K_gaussian)) models = [] K_linear = self.get_gram_matrix('linear_gram', kernel_linear) for C in range(self.C_steps): models.append( MulticlassSVM(kernel_linear, 0.001 * 2**C, K_linear)) train_idx = np.arange(self.X.shape[0])[train_indices] test_idx = np.arange(self.X.shape[0])[~train_indices] best_score = 0.0 best_model = None for model in models: model.fit(train_idx, self.y[train_idx]) score = model.score(test_idx, self.y[test_idx]) logging.debug('%s: (%s %s) - %s', subset_size, model.kernel, model.C, score) if score > best_score: best_score = score best_model = model logging.info('%s: (%s, %s) - %s', subset_size, best_model.kernel, best_model.C, best_score) fout.write('%s: %s\n' % (subset_size, best_score))
def closure(): # Composite Foreground and Background to Make Blended Image blend_img = torch.zeros(target_img.shape).to(gpu_id) blend_img = input_img*canvas_mask + target_img*(canvas_mask-1)*(-1) # Compute Laplacian Gradient of Blended Image pred_gradient = laplacian_filter_tensor(blend_img, gpu_id) # Compute Gradient Loss grad_loss = 0 for c in range(len(pred_gradient)): grad_loss += mse(pred_gradient[c], gt_gradient[c]) grad_loss /= len(pred_gradient) grad_loss *= grad_weight # Compute Style Loss target_features_style = vgg(mean_shift(target_img)) target_gram_style = [gram_matrix(y) for y in target_features_style] blend_features_style = vgg(mean_shift(input_img)) blend_gram_style = [gram_matrix(y) for y in blend_features_style] style_loss = 0 for layer in range(len(blend_gram_style)): style_loss += mse(blend_gram_style[layer], target_gram_style[layer]) style_loss /= len(blend_gram_style) style_loss *= style_weight # Compute Content Loss blend_obj = blend_img[:,:,int(x_start-source_img.shape[2]*0.5):int(x_start+source_img.shape[2]*0.5), int(y_start-source_img.shape[3]*0.5):int(y_start+source_img.shape[3]*0.5)] source_object_features = vgg(mean_shift(source_img*mask_img)) blend_object_features = vgg(mean_shift(blend_obj*mask_img)) content_loss = content_weight * mse(blend_object_features.relu2_2, source_object_features.relu2_2) content_loss *= content_weight # Compute TV Reg Loss tv_loss = torch.sum(torch.abs(blend_img[:, :, :, :-1] - blend_img[:, :, :, 1:])) + \ torch.sum(torch.abs(blend_img[:, :, :-1, :] - blend_img[:, :, 1:, :])) tv_loss *= tv_weight # Compute Total Loss and Update Image loss = grad_loss + style_loss + content_loss + tv_loss optimizer.zero_grad() loss.backward() # Print Loss if run[0] % 1 == 0: print("run {}:".format(run)) print('grad : {:4f}, style : {:4f}, content: {:4f}, tv: {:4f}'.format(\ grad_loss.item(), \ style_loss.item(), \ content_loss.item(), \ tv_loss.item() )) print() run[0] += 1 return loss
def optimize(args): """ Gatys et al. CVPR 2017 ref: Image Style Transfer Using Convolutional Neural Networks """ # load the content and style target content_image = utils.tensor_load_rgbimage(args.content_image, size=args.content_size, keep_asp=True) content_image = content_image.unsqueeze(0) content_image = Variable(utils.preprocess_batch(content_image), requires_grad=False) content_image = utils.subtract_imagenet_mean_batch(content_image) style_image = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style_image = style_image.unsqueeze(0) style_image = Variable(utils.preprocess_batch(style_image), requires_grad=False) style_image = utils.subtract_imagenet_mean_batch(style_image) # load the pre-trained vgg-16 and extract features vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) if args.cuda: content_image = content_image.cuda() style_image = style_image.cuda() vgg.cuda() features_content = vgg(content_image) f_xc_c = Variable(features_content[1].data, requires_grad=False) features_style = vgg(style_image) gram_style = [utils.gram_matrix(y) for y in features_style] # init optimizer output = Variable(content_image.data, requires_grad=True) optimizer = Adam([output], lr=args.lr) mse_loss = torch.nn.MSELoss() # optimizing the images for e in range(args.iters): utils.imagenet_clamp_batch(output, 0, 255) optimizer.zero_grad() features_y = vgg(output) content_loss = args.content_weight * mse_loss(features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_y = utils.gram_matrix(features_y[m]) gram_s = Variable(gram_style[m].data, requires_grad=False) style_loss += args.style_weight * mse_loss(gram_y, gram_s) total_loss = content_loss + style_loss if (e + 1) % args.log_interval == 0: print(total_loss.data.cpu().numpy()[0]) total_loss.backward() optimizer.step() # save the image output = utils.add_imagenet_mean_batch(output) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def forward(self, output, style): loss = 0 features_output = self.loss_network(output) features_style = self.loss_network(style) for layer in features_output.keys(): loss += F.mse_loss(gram_matrix(features_output[layer]), gram_matrix(features_style[layer])) return loss
def main(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") style_transform = transforms.Compose( [transforms.ToTensor(), ] ) content_transform = transforms.Compose( [transforms.ToTensor()] ) content_image = utils.load_image(args.content_image, size=args.image_size) style_image = utils.load_image(args.style_image, size=args.image_size) content = content_transform(content_image).unsqueeze(0).to(device) style = style_transform(style_image).unsqueeze(0).to(device) target = content.clone().to(device) target.requires_grad = True optimizer = torch.optim.Adam([target], lr=args.lr) vgg = Vgg19().to(device) content_features = vgg(content) style_features = vgg(style) for step in range(args.total_step): target_features = vgg(target) content_loss = 0.0 style_loss = 0.0 for t_f, c_f, s_f in zip(target_features, content_features, style_features): content_loss += args.content_weight*((t_f-c_f)**2).mean() t_gram = utils.gram_matrix(t_f) s_gram = utils.gram_matrix(s_f) style_loss += args.style_weight *((t_gram - s_gram)**2).mean() loss = content_loss + style_loss optimizer.zero_grad() loss.backward() optimizer.step() if (step + 1) % args.log_step == 0: print('Step [%d/%d], Content Loss: %.4f, Style Loss: %.4f, Total Loss: %.4f' % (step + 1, args.total_step, content_loss.data[0], style_loss.data[0], loss.data[0])) if (step + 1) % args.sample_step == 0: img = target.clone().cpu().squeeze() img = img.data.clamp_(0, 1) torchvision.utils.save_image(img, r'.\images\out\output-%d.png' % (step + 1))
def feature_selection(self): logging.debug('Getting feature scores') kernel_linear = kernels.kernel_wrapper(kernels.linear) kernel_gaussian = kernels.kernel_wrapper(kernels.gaussian, gamma=0.01) train_indices = [] best_features = [] for line in open('../results/feature_selection/feature_score'): if ':' in line: best_features.append(int(line.split(':')[0])) else: line = line.strip() if line.startswith('['): line = line[1:] elif line.endswith(']'): line = line[:-1] for s in line.split(): if s == 'True': train_indices.append(True) else: train_indices.append(False) train_indices = np.array(train_indices) best_features = np.array(best_features) with open('%s/best_features' % self.tmp_folder, 'w') as fout: for subset_size in range(10, min(best_features.shape[0], 1001), 10): logging.debug("Doing %s", subset_size) self.read_data(best_features[:subset_size]) self.preprocess_data() K_linear = utils.gram_matrix(self.X, kernel_linear) K_gaussian = utils.gram_matrix(self.X, kernel_gaussian) models = [] for C in range(self.C_steps): models.append(MulticlassSVM(kernel_linear, 0.001 * 2 ** C, K_linear)) models.append(MulticlassSVM(kernel_gaussian, 8.192, K_gaussian)) models = [] K_linear = self.get_gram_matrix('linear_gram', kernel_linear) for C in range(self.C_steps): models.append(MulticlassSVM(kernel_linear, 0.001 * 2 ** C, K_linear)) train_idx = np.arange(self.X.shape[0])[train_indices] test_idx = np.arange(self.X.shape[0])[~train_indices] best_score = 0.0 best_model = None for model in models: model.fit(train_idx, self.y[train_idx]) score = model.score(test_idx, self.y[test_idx]) logging.debug('%s: (%s %s) - %s', subset_size, model.kernel, model.C, score) if score > best_score: best_score = score best_model = model logging.info('%s: (%s, %s) - %s', subset_size, best_model.kernel, best_model.C, best_score) fout.write('%s: %s\n' % (subset_size, best_score))
def style_loss(style, combination, img_ncols, img_nrows): # Calculem la matriu de gram de la imatge de referència d'estil S = utils.gram_matrix(style) # Calculem la matriu de gram de la imatges de contingut C = utils.gram_matrix(combination) # Definim el canals channels = 3 # Definim la mida size = img_nrows * img_ncols # Retornem la pèrdua d'estil return tf.reduce_sum(tf.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))
def style_reconstruction_loss(self, pred, target, weights=[1, 1, 1, 1]): if target.shape[0] != pred.shape[0]: target = torch.cat([target for _ in range(pred.shape[0])], 0) phi_pred = self.style(pred) phi_target = self.style(target) s = torch.empty(pred.shape[0]).fill_(0.0).requires_grad_(True).to( self.device) for w, p, t in zip(weights, phi_pred, phi_target): gm_pred = gram_matrix(p) gm_target = gram_matrix(t) squared_error = w * torch.sum((gm_pred - gm_target)**2, dim=(1, 2)) s = torch.add(s, squared_error) return s
def evaluate(args): content_image = utils.tensor_load_rgbimage(args.content_image, size=args.content_size, keep_asp=True) content_image = content_image.unsqueeze(0) style = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style = style.unsqueeze(0) style = utils.preprocess_batch(style) vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) style_model = HangSNetV1() style_model.load_state_dict(torch.load(args.model)) if args.cuda: style_model.cuda() vgg.cuda() content_image = content_image.cuda() style = style.cuda() style_v = Variable(style, volatile=True) utils.subtract_imagenet_mean_batch(style_v) features_style = vgg(style_v) gram_style = [utils.gram_matrix(y) for y in features_style] content_image = Variable(utils.preprocess_batch(content_image)) target = Variable(gram_style[2].data, requires_grad=False) style_model.setTarget(target) output = style_model(content_image) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def get_feature_scores(self): logging.debug('Getting feature scores') self.read_data() self.preprocess_data() feature_score = [] train_indices = self.train_test_split() train_X = self.X[train_indices] train_y = self.y[train_indices] test_X = self.X[~train_indices] test_y = self.y[~train_indices] for i in range(self.X.shape[1]): K = utils.gram_matrix(train_X[:, i].reshape(train_X.shape[0], 1), kernels.gaussian) svm = MulticlassSVM(kernels.gaussian, 1.0, K) model, score = self.select_model(np.arange(train_X.shape[0]), train_y, [svm]) feature_score.append((score, i)) feature_score.sort() feature_score.reverse() logging.info('Sorted feature scores: %s' % feature_score) with open('%s/feature_score' % self.tmp_folder, 'w') as fout: fout.write('%s\n' % train_indices) for score in feature_score: fout.write('%s: %s\n' % (score[1], score[0]))
def call(self, inputs): """ :param inputs: An image. Expects flat input in [0, 1] :return: Style and content tensors """ inputs = inputs * 255.0 preprocessed_input = tf.keras.applications.vgg19.preprocess_input( inputs) outputs = self.vgg(preprocessed_input) style_outputs, content_outputs = ( outputs[:self.num_style_layers], outputs[self.num_style_layers:], ) style_outputs = [ gram_matrix(style_output) for style_output in style_outputs ] content_dict = { content_name: value for content_name, value in zip(self.content_layers, content_outputs) } style_dict = { style_name: value for style_name, value in zip(self.style_layers, style_outputs) } return {"content": content_dict, "style": style_dict, "raw": inputs}
def loss(self, images, stylized, inplace=True): stylized = utils.normalize_batch(stylized, inplace=inplace) images = utils.normalize_batch(images, inplace=inplace) features_stylized = self.vgg_extractor(stylized) features_images = self.vgg_extractor(images) content_loss = self.hparams.content_weight * \ F.mse_loss(features_stylized.relu2_2, features_images.relu2_2) # style_weights = [1.0, # 1.0, # 1.2, # 1.4, # 1.4] style_weights = [1.0, 1.0, 1.4, 1.0, 1.0] style_loss = 0. for i, (ft_stylized, gm_s) in enumerate(zip(features_stylized, self.gram_style)): gm_stylized = utils.gram_matrix(ft_stylized) gm_s = gm_s.type_as(ft_stylized) c, h, w = gm_stylized.shape style_loss += F.mse_loss(gm_stylized, gm_s[:len(images), :, :]) # style_loss *= style_weights[i] / (c * h * w) style_loss *= self.hparams.style_weight total_loss = content_loss + style_loss return total_loss, content_loss, style_loss
def losses(content_maps, content_output, content_weight, style_maps, style_output, style_weight): def mse(y, x): return tf.losses.mean_squared_error(labels=y, predictions=x) #TODO: content weights loss_content = [ mse(pred, label) for label, pred in zip(content_maps, content_output) ] loss_content_red = tf.reduce_sum(loss_content) loss_content_red *= content_weight style_weights = [0.2, 0.2, 0.2, 0.2, 0.2] style_weights = tf.unstack(tf.constant(style_weights), axis=0) loss_style_raw = [ mse(utils.gram_matrix(pred), label) for label, pred in zip(style_maps, style_output) ] loss_style = [ tf.multiply(weight, layer_loss) for weight, layer_loss in zip(style_weights, loss_style_raw) ] loss_style_red = tf.reduce_sum(loss_style) loss_style_red *= style_weight return loss_content_red, loss_style_red
def load(cont_path, style_path, device, img_size): ''' A small function for loading and preparing the necessities.\n `cont_path`: Path/Link to the content image.\n `style_path`: Path/Link to the style image.\n `device`: The device for the model and the images.\n `img_size`: The desired size for the image. ''' content_image = load_image(cont_path, device, img_size) _, _, w, h = content_image.shape style_image = load_image(style_path, device, (w, h)) target = content_image.clone().requires_grad_(True).to(device) vgg = models.vgg19(pretrained=True).features.eval().to(device) content_features = get_features(content_image, vgg, layers) style_features = get_features(style_image, vgg, layers) style_grams = { layer: gram_matrix(style_features[layer]) for layer in style_features } return content_features, style_grams, target, vgg
def step(engine, batch): x, _ = batch x = x.to(device) n_batch = len(x) optimizer.zero_grad() y = transformer(x) x = utils.normalize_batch(x) y = utils.normalize_batch(y) features_x = vgg(x) features_y = vgg(y) content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) style_loss = 0.0 for ft_y, gm_s in zip(features_y, gram_style): gm_y = utils.gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= args.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() return { "content_loss": content_loss.item(), "style_loss": style_loss.item(), "total_loss": total_loss.item() }
def perceptual_loss(features=None, style_grams=None, content_features=None, cfg=None): """ Computes the loss of the network, accounting for style and content loss (total variation loss handled separately). Args: --features: <dict> The vgg activations of the current output of the image transformation net --style_grams: <list> The gram matrix of each vgg activation for the style image --content_features: <dict> The vgg activations of the content image --cfg: <dict> The config dictionary Returns: <tuple> The content loss and style loss of the image transformation network. """ # Content loss content_loss = nn.MSELoss(reduction='mean')(features['relu2_2'], content_features['relu2_2']) content_loss *= cfg['content_weight'] # Style loss style_loss = 0 grams = [gram_matrix(act) for act in features.values()] for gram, style_gram in zip(grams, style_grams): style_loss += nn.MSELoss(reduction='mean')(gram, style_gram) style_loss *= (cfg['style_weight'] / len(grams)) return content_loss, style_loss
def train_step(batch): with tf.GradientTape() as tape: output_batch = it_network(batch, training=True) output_batch = 255 * (output_batch + 1.0) / 2.0 # float deprocess # Feed target and output batch through loss_network target_batch_feature_maps = loss_network(batch) output_batch_feature_maps = loss_network(output_batch) c_loss = content_loss( target_batch_feature_maps[hparams['content_layer_index']], output_batch_feature_maps[hparams['content_layer_index']]) c_loss *= hparams['content_weight'] # Get output gram_matrix output_gram_matrices = [ gram_matrix(x) for x in output_batch_feature_maps ] s_loss = style_loss(target_gram_matrices, output_gram_matrices) s_loss *= hparams['style_weight'] / num_style_layers total_loss = c_loss + s_loss scaled_loss = optimizer.get_scaled_loss(total_loss) scaled_gradients = tape.gradient(scaled_loss, it_network.trainable_variables) gradients = optimizer.get_unscaled_gradients(scaled_gradients) #gradients = tape.gradient(total_loss, it_network.trainable_variables) optimizer.apply_gradients( zip(gradients, it_network.trainable_variables)) total_loss_avg(total_loss) content_loss_avg(c_loss) style_loss_avg(s_loss)
def get_style_grams(style_im, cfg): """ Computes the style grams for the vgg activations of the given style image. Args: --style_im: <tensor> The pre-processed style image. Should have shape (ch, h, w) --cfg: <dict> The training config dictionary Returns: <list> The gram matrix for each layer in the vgg activations of the style image. """ assert len( style_im.shape ) == 3, f'Style image expected to have 3 dimensions but has {len(style_im.shape)}. Is it already a batch?' # First, turn the style image into a batch style_batch = torch.stack([style_im] * cfg['batch_size'], dim=0) # Get vgg activations style_features = vgg_activations(style_batch) # Get grams style_grams = [gram_matrix(act) for act in style_features.values()] return style_grams
def _style_loss(self, features_y): style_loss = 0. for ft_y, gm_s in zip(features_y.values(), self._gram_style): gm_y = utils.gram_matrix(ft_y) style_loss += self._mse_loss(gm_y, gm_s) style_loss *= self._style_weight return style_loss
def closure(): optimizer.zero_grad() y.data.clamp_(0, 1) features_y = vgg(y) # feature maps of y extracted from VGG gram_style_y = [utils.gram_matrix(i) for i in features_y ] # gram matrixs of feature_y in relu1_2,2_2,3_3,4_3 fc = features_content.relu4_3 # content target in relu4_3 fy = features_y.relu4_3 # y in relu4_3 style_loss = 0 # add style_losses in relu1_2,2_2,3_3,4_3 for fy_gm, fs_gm in zip(gram_style_y, gram_style): style_loss += mse_loss(fy_gm, fs_gm) style_loss = STYLE_WEIGHT * style_loss # fy_gm = gram_style_y[3] # fs_gm = gram_style[3] # style_loss = STYLE_WEIGHT * mse_loss(fy_gm, fs_gm) content_loss = CONTENT_WEIGHT * mse_loss(fc, fy) # content loss tv_loss = TV_WEIGHT * ( torch.sum(torch.abs(y[:, :, :, :-1] - y[:, :, :, 1:])) + torch.sum(torch.abs(y[:, :, :-1, :] - y[:, :, 1:, :]))) total_loss = content_loss + style_loss + tv_loss total_loss.backward(retain_graph=True) if epoch % 100 == 0: print( "Epoch {}: Style Loss : {:4f} Content Loss: {:4f} TV Loss: {:4f}" .format(epoch, style_loss, content_loss, tv_loss)) if epoch % 1000 == 0: utils.save_image_epoch(y, './outputs/', epoch) return total_loss
def compare_classifiers(names, classifiers): scores = {name: [0, 0, 0] for name in names} for test in range(100): logging.debug('Running test %s', test) X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable ] for i, ds in enumerate(datasets): X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) x_values = ['%s:%s' % (x[0], x[1]) for x in X_train] idx = np.array([False] * X.shape[0]) indices = np.arange(X.shape[0]) for j, x in enumerate(X): if '%s:%s' % (x[0], x[1]) in x_values: idx[j] = True K_linear = utils.gram_matrix(X, kernels.linear) K_gaussian = utils.gram_matrix(X, kernels.gaussian) classifiers[2].K = K_linear classifiers[3].K = K_gaussian for name, clf in zip(names, classifiers): if name.startswith("sklearn"): clf.fit(X_train, y_train) score = clf.score(X_test, y_test) else: clf.fit(indices[idx], y[idx]) score = clf.score(indices[~idx], y[~idx]) scores[name][i] += score / 100.0 return scores
def train_ofb(args): train_dataset = dataset.DAVISDataset(args.dataset, use_flow=True) train_loader = DataLoader(train_dataset, batch_size=1) transformer = transformer_net.TransformerNet(args.pad_type) transformer.train() optimizer = torch.optim.Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16() vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) vgg.eval() if args.cuda: transformer.cuda() vgg.cuda() mse_loss.cuda() style = utils.tensor_load_resize(args.style_image, args.style_size) style = style.unsqueeze(0) print("=> Style image size: " + str(style.size())) print("=> Pixel OFB loss weight: %f" % args.time_strength) style = utils.preprocess_batch(style) if args.cuda: style = style.cuda() style = utils.subtract_imagenet_mean_batch(style) features_style = vgg(style) gram_style = [utils.gram_matrix(y).detach() for y in features_style] train_loader.dataset.reset() transformer.train() transformer.cuda() agg_content_loss = agg_style_loss = agg_pixelofb_loss = 0. iters = 0 anormaly = False elapsed_time = 0 for batch_id, (x, flow, conf) in enumerate(tqdm(train_loader)): x, flow, conf = x[0], flow[0], conf[0] iters += 1 optimizer.zero_grad() x = utils.preprocess_batch(x) # (N, 3, 256, 256) if args.cuda: x = x.cuda() flow = flow.cuda() conf = conf.cuda() y = transformer(x) # (N, 3, 256, 256) begin_time = time.time() warped_y, warped_y_mask = warp(y[1:], flow) warped_y = warped_y.detach() warped_y_mask *= conf pixel_ofb_loss = args.time_strength * weighted_mse( y[:-1], warped_y, warped_y_mask) pixel_ofb_loss.backward() elapsed_time += time.time() - begin_time if batch_id > 1000: break print(elapsed_time / float(batch_id + 1))
def get_style_loss(style_gram_list, output_features, style_id): style_loss = 0 for i in xrange(len(output_features)): for j in xrange(len(style_id)): gram_o = gram_matrix(output_features[i][j].unsqueeze(0)) gram_s = style_gram_list[style_id[j]][i] style_loss += mse_loss(gram_o, gram_s) return style_loss
def gramLayer(self, image): conv_out = tf.layers.conv2d(image, 32, (7, 7), padding='same', activation=tf.nn.relu) gramTensor = gram_matrix(conv_out) # Added: an additional layer taking our input tensors and reshaping them gram_out = tf.reshape(gramTensor, [-1, 1024]) gram_out = tf.layers.dense(gram_out, 224, activation=tf.nn.relu) return tf.reshape(gram_out, [-1, 224, 1, 1])
def train(): cfg = Config() vgg = Vgg16().to(cfg.device).eval() for param in vgg.parameters(): param.requires_grad = False # 固定网络的参数 content = utils.get_image_data(cfg.content_path, cfg.image_size).to(cfg.device) style = utils.get_image_data(cfg.style_path, cfg.image_size).to(cfg.device) target = content.clone().requires_grad_(True) content_features = vgg(content) style_features = vgg(style) gram_styles = [ utils.gram_matrix(x).requires_grad_(False) for x in style_features ] batches, channels, h, w = list(gram_styles.size()) # 注意要使style——gram的requires_grad置于False,F.mse_loss要求 optimizer = t.optim.Adam([target], lr=cfg.lr) for epoch in range(cfg.epoches): target_features = vgg(target) content_loss = F.mse_loss( target_features.relu3_3, content_features.relu3_3.requires_grad_(False)) style_loss = 0. for tar, gram_style in zip(target_features, gram_styles): tar_gram = utils.gram_matrix(tar) style_loss += F.mse_loss(tar_gram, gram_style) style_loss = style_loss / (2 * channels * h * w) ^ 2 total_loss = cfg.content_weight * content_loss + cfg.style_weight * style_loss optimizer.zero_grad() total_loss.backward() optimizer.step() if epoch % 100 == 0: print( "iteration:{} Content loss:{:.4f},Style loss:{:.4f},Total loss:{:.4f}" .format(epoch + 1, content_loss.item(), style_loss.item(), total_loss.item())) denorm = tv.transforms.Normalize([-2.12, -2.04, -1.80], [4.37, 4.46, 4.44]) target = denorm(target.squeeze().to('cpu')).clamp_(min=0, max=1) tv.utils.save_image( target, cfg.combined_path + '/output ' + str(cfg.content_weight / cfg.style_weight) + '.png')
def call(self, inputs): preprocessed = preprocess_input(inputs) outputs = self.vgg(preprocessed) style_outputs = [gram_matrix(features) for features in outputs[:self.style_layers_len]] content_outputs = outputs[self.style_layers_len:] return style_outputs, content_outputs
def get_feature(self, sess, layers, image, gram=False): assert self.net is not None, "ERROR!!!! Please build model first" features = {} for l in layers: feat = sess.run(self.net[l], feed_dict={self.net['input']: image}) if gram: feat = utils.gram_matrix(feat, -1, feat.shape[3]) features[l] = feat.eval() return features
def forward(self, x): G = gram_matrix(x) if self.mode == "capture": self.target = G.detach() elif self.mode == "loss": self.loss = self.crit(G, self.target) return x
def engine(): global epoch # compute content loss content_G = model.features[:20](G) content_C = model.features[:20](C) loss_content = F.mse_loss(content_G, content_C) # get intermediate representations for style loss layers_G = [model.features[0](G)] layers_S = [model.features[0](S)] for idx in model_layers: layers_G.append(model.features[idx[0]:idx[1]](layers_G[-1])) layers_S.append(model.features[idx[0]:idx[1]](layers_S[-1])) # compute style loss using gram matrix style_losses = [] for g, s in zip(layers_G, layers_S): num_channels = g.shape[1] num_pixels = g.shape[2] factor = 4 * num_channels * num_channels * num_pixels * num_pixels style_losses.append( F.mse_loss(utils.gram_matrix(g), utils.gram_matrix(s)) / factor) loss_style = sum(style_losses) / len( style_losses) # equal weights for each layer loss = loss_content + args.alpha * loss_style optimizer.zero_grad() loss.backward() # log metrics losses["content"].append(loss_content.item()) losses["style"].append(args.alpha * loss_style.item()) losses["total"].append(loss.item()) if (epoch + 1) % args.log_interval == 0: utils.save_image(G.cpu().detach(), args.output_folder, epoch) pbar.update() epoch += 1 return loss
def get_gram_matrix(self, file_base, kernel): file_path = None if self.gram_folder is not None: file_path = '%s/%s' % (self.gram_folder, file_base) if file_path is not None and os.path.exists(file_path): K = self.read_gram_matrix(file_path) else: K = utils.gram_matrix(self.X, kernel) self.write_gram_matrix(K, '%s/%s' % (self.tmp_folder, file_base)) return K
def compare_classifiers(names, classifiers): scores = {name: [0, 0, 0] for name in names} for test in range(100): logging.debug('Running test %s', test) X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable ] for i, ds in enumerate(datasets): X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) x_values = ['%s:%s' % (x[0], x[1]) for x in X_train] idx = np.array([False] * X.shape[0]) indices = np.arange(X.shape[0]) for j, x in enumerate(X): if '%s:%s' % (x[0], x[1]) in x_values: idx[j] = True K_linear = utils.gram_matrix(X, kernels.linear) K_gaussian = utils.gram_matrix(X, kernels.gaussian) classifiers[2].K = K_linear classifiers[3].K = K_gaussian for name, clf in zip(names, classifiers): if name.startswith("sklearn"): clf.fit(X_train, y_train) score = clf.score(X_test, y_test) else: clf.fit(indices[idx], y[idx]) score = clf.score(indices[~idx], y[~idx]) scores[name][i] += score / 100.0 return scores
def get_feature_scores(self): logging.debug('Getting feature scores') self.read_data() self.preprocess_data() feature_score = [] train_indices = self.train_test_split() train_X = self.X[train_indices] train_y = self.y[train_indices] test_X = self.X[~train_indices] test_y = self.y[~train_indices] for i in range(self.X.shape[1]): K = utils.gram_matrix(train_X[:,i].reshape(train_X.shape[0], 1), kernels.gaussian) svm = MulticlassSVM(kernels.gaussian, 1.0, K) model, score = self.select_model(np.arange(train_X.shape[0]), train_y, [svm]) feature_score.append((score, i)) feature_score.sort() feature_score.reverse() logging.info('Sorted feature scores: %s' % feature_score) with open('%s/feature_score' % self.tmp_folder, 'w') as fout: fout.write('%s\n' % train_indices) for score in feature_score: fout.write('%s: %s\n' % (score[1], score[0]))
def train(args): device = torch.device("cuda" if args.cuda else "cpu") np.random.seed(args.seed) torch.manual_seed(args.seed) transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(args.dataset, transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size) transformer = TransformerNet().to(device) optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16(requires_grad=False).to(device) style_transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) style = utils.load_image(args.style_image, size=args.style_size) style = style_transform(style) style = style.repeat(args.batch_size, 1, 1, 1).to(device) features_style = vgg(utils.normalize_batch(style)) gram_style = [utils.gram_matrix(y) for y in features_style] for e in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() x = x.to(device) y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gm_y = utils.gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= args.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.item() agg_style_loss += style_loss.item() if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, count, len(train_dataset), agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_content_loss + agg_style_loss) / (batch_id + 1) ) print(mesg) if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0: transformer.eval().cpu() ckpt_model_filename = "ckpt_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".pth" ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) transformer.to(device).train() # save model transformer.eval().cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
def train(**kwargs): opt = Config() for k_, v_ in kwargs.items(): setattr(opt, k_, v_) vis = utils.Visualizer(opt.env) # 数据加载 transfroms = tv.transforms.Compose([ tv.transforms.Scale(opt.image_size), tv.transforms.CenterCrop(opt.image_size), tv.transforms.ToTensor(), tv.transforms.Lambda(lambda x: x * 255) ]) dataset = tv.datasets.ImageFolder(opt.data_root, transfroms) dataloader = data.DataLoader(dataset, opt.batch_size) # 转换网络 transformer = TransformerNet() if opt.model_path: transformer.load_state_dict(t.load(opt.model_path, map_location=lambda _s, _: _s)) # 损失网络 Vgg16 vgg = Vgg16().eval() # 优化器 optimizer = t.optim.Adam(transformer.parameters(), opt.lr) # 获取风格图片的数据 style = utils.get_style_data(opt.style_path) vis.img('style', (style[0] * 0.225 + 0.45).clamp(min=0, max=1)) if opt.use_gpu: transformer.cuda() style = style.cuda() vgg.cuda() # 风格图片的gram矩阵 style_v = Variable(style, volatile=True) features_style = vgg(style_v) gram_style = [Variable(utils.gram_matrix(y.data)) for y in features_style] # 损失统计 style_meter = tnt.meter.AverageValueMeter() content_meter = tnt.meter.AverageValueMeter() for epoch in range(opt.epoches): content_meter.reset() style_meter.reset() for ii, (x, _) in tqdm.tqdm(enumerate(dataloader)): # 训练 optimizer.zero_grad() if opt.use_gpu: x = x.cuda() x = Variable(x) y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) # content loss content_loss = opt.content_weight * F.mse_loss(features_y.relu2_2, features_x.relu2_2) # style loss style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gram_y = utils.gram_matrix(ft_y) style_loss += F.mse_loss(gram_y, gm_s.expand_as(gram_y)) style_loss *= opt.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() # 损失平滑 content_meter.add(content_loss.data[0]) style_meter.add(style_loss.data[0]) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # 可视化 vis.plot('content_loss', content_meter.value()[0]) vis.plot('style_loss', style_meter.value()[0]) # 因为x和y经过标准化处理(utils.normalize_batch),所以需要将它们还原 vis.img('output', (y.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) vis.img('input', (x.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) # 保存visdom和模型 vis.save([opt.env]) t.save(transformer.state_dict(), 'checkpoints/%s_style.pth' % epoch)