x_test_normalised = x_test_normalised / train_sd[:-1] ### FULL GP ### # hyperparameters no_inputs = 13 BFGS = False learning_rate = 0.1 no_iters = 500 # initialise model model = GP(no_inputs) # optimize hyperparameters if BFGS == True: optimizer = optim.LBFGS(model.parameters(), lr=learning_rate) else: optimizer = optim.Adam(model.parameters(), lr=learning_rate) with trange(no_iters) as t: for i in t: if BFGS == True: def closure(): optimizer.zero_grad() NLL = -model.get_LL(x_train_normalised, y_train_normalised) NLL.backward() return NLL optimizer.step(closure) NLL = -model.get_LL(x_train_normalised, y_train_normalised)
def get_input_optimizer(self, input_img): # this line to show that input is a parameter that requires a gradient optimizer = optim.LBFGS([input_img.requires_grad_()]) return optimizer
def run_style_transfer( content_img, style_img, input_img, first_pass_img, style_aligned_img, mask, learning_rate, content_layers, style_layers, n_iter, style_weight=0.007, content_weight=1.0, phase=0, pass_=1): # extract content features content_features = Net(content_layers=content_layers)(content_img, phase=phase).content_features style_aligned_features = Net(content_layers=content_layers, mask=mask)(style_aligned_img, phase=phase).content_features # modify the content features through the use of gain maps (style transfer # for head portraits) if use_gain_maps: for i, (c, s) in enumerate(zip(content_features, style_aligned_features)): content_features[i] = c * gain_map(c, s) # extract style features style_features = Net(style_layers=style_layers, mask=mask)(style_img, phase=phase).style_features input_features = Net(style_layers=style_layers, mask=mask)(first_pass_img, phase=phase).style_features # first pass if pass_ == 1: maps = mapping(input_features, style_features) modified_style_features = align(style_features, maps) # second pass else: # index of the reference layer ref = 2 # determine the matching between content and style patches map = mapping([input_features[ref]], [style_features[ref]])[0] mask = nn.Upsample(size=style_features[ref].shape[2:4], mode='nearest')(mask) # make the mapping more robust map = refined_mapping(map, style_features[ref][0], mask.reshape(-1)) # propagate the mapping obtained at the reference layer to other style layers mappings = [propagate_mapping(map, style_features[ref].shape[2:4], sf.shape[2:4]) for sf in style_features] # align the style features based on the mapping modified_style_features = align(style_features, mappings) net = Net(content_layers=content_layers, style_layers=style_layers, mask=mask) features = {} optimizer = optim.LBFGS([input_img.requires_grad_()], lr=learning_rate) run = [0] while run[0] <= n_iter: def closure(): input_img.data.clamp_(0, 1) optimizer.zero_grad() model = net(input_img, content_features=content_features, \ style_features=modified_style_features, phase=phase) content_score = model.content_loss style_score = model.style_loss content_score = content_weight/len(content_layers) * content_score style_score = style_weight/len(style_layers) * style_score tv_loss = 0.000001 * total_variation_loss(input_img) loss = content_score + style_score + tv_loss loss.backward(retain_graph=True) run[0] += 1 if run[0] % 50 == 0: print("run {}:".format(run)) print('Style Loss : {:4f} Content Loss: {:4f} TV Loss: {:4f}'.format( style_score, content_score, tv_loss, loss)) Image.from_tensor(input_img).save("./frames/frame-{}-{}.png".format(phase,run[0])) return style_score + content_score optimizer.step(closure) input_img.data.clamp_(0, 1) return input_img
def B3_step(B, F, Z, inv_A, lambda_1, lambda_2, rho1, rho2, gamma): """ Update B : F^t * W :param F: output of network as the real-valued embeddings n X k :param Z: anchor graph, n X m Affinity matrix W: Z * inv_A * Z^t :return: the updated B, k X n """ bit, num_train = B.shape ini_B = B Y1 = Variable(torch.randn(bit, num_train)) Y2 = Variable(torch.randn(bit, num_train)) loss_old = 0 B = Variable(torch.from_numpy(ini_B).type(torch.FloatTensor), requires_grad=True) Z1 = Variable(torch.from_numpy(ini_B).type(torch.FloatTensor)) Z2 = Variable(torch.from_numpy(ini_B).type(torch.FloatTensor)) optimizer_B = optim.LBFGS([B], lr=0.1) F = Variable(torch.from_numpy(F).type(torch.FloatTensor)) inv_A = Variable(torch.from_numpy(inv_A).type(torch.FloatTensor)) Z = Variable(torch.from_numpy(Z).type(torch.FloatTensor)) Z_T = Z.t() # m X n nI_K = Variable(num_train * torch.eye(bit, bit)) for iter_B in range(20): def closure(): optimizer_B.zero_grad() temp = torch.mm(B, torch.mm(Z, inv_A)) # k X m temp2 = torch.mm(temp, Z_T) # k X n BAF = torch.mm(temp2, F) # k X k loss_BLF = torch.trace(torch.mm(B, F) - BAF) reg_loss = (B - F.t())**2 oth_loss = lambda_1 * ((torch.mm(B, F) - nI_K)**2) bla_loss = lambda_2 * (B.sum(0)**2) G = Y1 + Y2 - rho1 * Z1 - rho2 * Z2 rho_loss = ( (rho1 + rho2) / 2) * (B**2).sum() + torch.trace(B.mm(G.t())) loss = ( rho_loss + loss_BLF + 0.5 * (reg_loss.sum() + oth_loss.sum() + bla_loss.sum())) / num_train loss.backward() return loss optimizer_B.step(closure) count = (B.data.numpy() > 0).sum() print('+1, -1: %.2f%%\n' % (float(count) / num_train / bit * 100)) print('res(init_B and Bk): %d\n' % ((np.sign(B.data.numpy()) - ini_B)).sum()) Z1_k = H1_step(B.data.numpy(), Y1.data.numpy(), rho1) Z2_k = H2_step(B.data.numpy(), Y2.data.numpy(), rho2) Y1_k, Y2_k = Y_step(Y1.data.numpy(), Y2.data.numpy(), Z1_k, Z2_k, B.data.numpy(), rho1, rho2, gamma) Z1 = Variable(torch.from_numpy(Z1_k).type(torch.FloatTensor)) Z2 = Variable(torch.from_numpy(Z2_k).type(torch.FloatTensor)) Y1 = Variable(torch.from_numpy(Y1_k).type(torch.FloatTensor)) Y2 = Variable(torch.from_numpy(Y2_k).type(torch.FloatTensor)) loss = calc_all_loss(B.data.numpy(), F.data.numpy(), Z.data.numpy(), inv_A.data.numpy(), Z1.data.numpy(), Z2.data.numpy(), Y1.data.numpy(), Y2.data.numpy(), rho1, rho2, lambda_1, lambda_2) res_error = (loss - loss_old) / loss_old loss_old = loss print('loss is %.4f, residual error is %.5f\n' % (loss, res_error)) if (np.abs(res_error) <= 1e-4): break return np.sign(B.data.numpy())
def run_neural_style_transfer(content_image_name=content_image_name, style_image_name=style_image_name, content_layers=content_layers, content_weights=content_weights, style_layers=style_layers, style_weights=style_weights, max_iter=max_iter, show_iter=show_iter, swap_content_style=False, add_index=False, output_dir=output_dir): global cnt # load images, ordered as [style_image, content_image] img_dirs = [image_dir, image_dir] if swap_content_style: img_names = [content_image_name, style_image_name] else: img_names = [style_image_name, content_image_name] imgs = [Image.open(img_dirs[i] + name) for i, name in enumerate(img_names)] imgs_torch = [prep(img) for img in imgs] if torch.cuda.is_available(): imgs_torch = [Variable(img.unsqueeze(0).cuda()) for img in imgs_torch] else: imgs_torch = [Variable(img.unsqueeze(0)) for img in imgs_torch] style_image, content_image = imgs_torch # opt_img = Variable(torch.randn(content_image.size()).type_as(content_image.data), requires_grad=True) #random init opt_img = Variable(content_image.data.clone(), requires_grad=True) loss_layers = style_layers + content_layers loss_fns = [GramMSELoss()] * len(style_layers) + [nn.MSELoss() ] * len(content_layers) if torch.cuda.is_available(): loss_fns = [loss_fn.cuda() for loss_fn in loss_fns] weights = style_weights + content_weights #compute optimization targets style_targets = [ GramMatrix()(A).detach() for A in vgg(style_image, style_layers) ] content_targets = [A.detach() for A in vgg(content_image, content_layers)] targets = style_targets + content_targets #run style transfer if add_index: print("Running neural style transfer %d on " % cnt, os.uname()[1]) else: print("Running neural style transfer on ", os.uname()[1]) print("Content image name:", content_image_name) print("Style image name:", style_image_name) print("Image size = %d" % img_size) print("Max number of iterations = %d" % max_iter) print("Show result every %d iterations" % show_iter) print("Content layer(s):", content_layers) print("Content weight(s):", content_weights) print("Style layer(s):", style_layers) print("Style weight(s):", style_weights) print("\n\n") optimizer = optim.LBFGS([opt_img]) n_iter = [0] t0 = perf_counter() while n_iter[0] <= max_iter: def closure(): optimizer.zero_grad() out = vgg(opt_img, loss_layers) layer_losses = [ weights[a] * loss_fns[a](A, targets[a]) for a, A in enumerate(out) ] loss = sum(layer_losses) loss.backward() n_iter[0] += 1 #print loss if n_iter[0] % show_iter == (show_iter - 1): print('Iteration: %d, loss: %f' % (n_iter[0] + 1, loss.data.item())) return loss optimizer.step(closure) t1 = perf_counter() print("Total execution time: %f" % (t1 - t0)) print( "===========================================================================================" ) print("\n\n") out_img = postp(opt_img.data[0].cpu().squeeze()) if add_index: out_img_path = "%s/nst_stylized_image%d.jpg" % (output_dir, cnt) cnt += 1 else: out_img_path = "%s/nst_stylized_image.jpg" % output_dir out_img.save(out_img_path) return out_img_path
def hierarchical_end_to_end_optimization_sample_position( init_guess, target_exemplar, upscaling_rate, img_res1, img_res2, kernel_sigma1, kernel_sigma2, num_optim_step, texture_weight, structure_weight, histogram_weight, image_histogram_weight, texture_layers, structure_layers, optim_method, results_dir): global break_loop kernel_sigma_list = torch.linspace(kernel_sigma1, kernel_sigma2, 3) img_res_list = torch.linspace(img_res1, img_res2, 3) stopping_crit_list = torch.linspace(0.01, 0.01, 3) outerloop = 0 lr_list = [0.02, 0.01, 0.01, 0.002, 0.002, 0.002, 0.002, 0.002] run = [0] while outerloop < 3: stopping_crit = stopping_crit_list[outerloop].tolist() kernel_sigma = kernel_sigma_list[outerloop].tolist() img_res = img_res_list[outerloop].tolist() img_res_input = round(img_res * upscaling_rate) kernel_sigma_input = kernel_sigma / upscaling_rate from point2image import Point2Image target_p2i = Point2Image(2, 0, kernel_sigma=kernel_sigma, feature_sigma=0, res=img_res) input_p2i = Point2Image(2, 0, kernel_sigma=kernel_sigma_input, feature_sigma=0, res=img_res_input) if optim_method == 'LBFGS': optimizer = optim.LBFGS([init_guess.requires_grad_()], lr=0.5) elif optim_method == 'Adam': optimizer = optim.Adam([init_guess.requires_grad_()], lr=lr_list[outerloop]) print('step:', lr_list[outerloop]) target_texture_img = target_p2i(target_exemplar).repeat(1, 3, 1, 1).to(device) fstyle_loss = StyleLoss(target_texture_img) save_image(target_texture_img.squeeze() / target_texture_img.max(), results_dir + '/target' + str(outerloop) + '.jpg') np.savetxt(results_dir + '/target' + str(outerloop) + '.txt', target_texture_img[0, 0, :, :].cpu().data.numpy()) np.savetxt(results_dir + '/target_points' + str(outerloop) + '.txt', target_exemplar.cpu().data.numpy()) fig = plt.figure() plt.scatter(target_exemplar.data[:, 0], target_exemplar.data[:, 1]) plt.savefig(results_dir + '/scatter_target' + str(outerloop) + '.jpg') plt.close(fig) img_hist_loss = HistogramLoss(target_texture_img) print('Building the texture model..') model, texture_losses, structure_losses, _, histogram_losses = get_style_model_and_losses( cnn, target_texture_img, texture_layers, structure_layers) log_losses = [] print('Optimizing..') break_loop = False inner_run = [0] while run[0] <= num_optim_step and not break_loop: def closure(): global break_loop init_guess.data.clamp_(0, 1) input_soft_points = input_p2i(init_guess) input_soft_points.clamp(min=target_texture_img.min(), max=target_texture_img.max()) optimizer.zero_grad() input_density_img = input_soft_points.repeat(1, 3, 1, 1) img_hist_loss(input_density_img) if run[0] == 0: save_image( input_density_img.squeeze() / input_density_img.max(), results_dir + '/init' + str(outerloop) + '.jpg') np.savetxt( results_dir + '/init' + str(outerloop) + '.txt', input_density_img[0, 0, :, :].cpu().data.numpy()) np.savetxt( results_dir + '/init_points' + str(outerloop) + '.txt', init_guess.cpu().data.numpy()) fig = plt.figure() plt.scatter(init_guess.data[:, 0], init_guess.data[:, 1]) plt.savefig(results_dir + '/init_points' + str(outerloop) + '.jpg') plt.close(fig) model(input_density_img) texture_score = torch.zeros(1).to(device) structure_score = torch.zeros(1).to(device) histogram_score = torch.zeros(1).to(device) img_hist_score = torch.zeros(1).to(device) for tl in texture_losses: texture_score += texture_weight * tl.loss for sl in structure_losses: structure_score += structure_weight * sl.loss fstyle_loss(input_density_img) ftexture_score = fstyle_loss.loss loss = texture_score + structure_score + histogram_score + img_hist_score + ftexture_score #+ homo_score loss.backward() log_losses.append(loss) run[0] += 1 inner_run[0] += 1 if run[0] % 5 == 0: for param_group in optimizer.param_groups: print(param_group['lr']) print("run {}:".format(run)) print( 'Texture Loss : {:4f}, FTexture_Loss: {:4f}, Structure Loss : {:4f}, Histogram Loss : {:4f}, Image Histogram Loss : {:4f}' .format(texture_score.item(), ftexture_score.item(), structure_score.item(), histogram_score.item(), img_hist_score.item())) save_image((input_density_img / input_density_img.max()), results_dir + '/out' + str(run[0]) + '_' + str(outerloop) + '.jpg') np.savetxt( results_dir + '/out' + str(run[0]) + '_' + str(outerloop) + '.txt', input_density_img[0, 0, :, :].cpu().data.numpy()) np.savetxt( results_dir + '/out_points' + str(run[0]) + '_' + str(outerloop) + '.txt', init_guess.cpu().data.numpy()) fig = plt.figure() plt.figure() plt.scatter(init_guess.data[:, 0], init_guess.data[:, 1], s=2) plt.savefig(results_dir + '/out_points' + str(run[0]) + '_' + str(outerloop) + '.jpg') plt.close('all') print('inner_run', inner_run) print('stopping_crit', stopping_crit) if inner_run[0] > 100: loss_init = log_losses[0] loss_pre = log_losses[inner_run[0] - 100] loss_now = log_losses[inner_run[0] - 1] decrease_perc = ((loss_pre - loss_now) / (loss_init - loss_now)).tolist()[0] print(decrease_perc) if decrease_perc < stopping_crit: print('converged') break_loop = True else: np.savetxt( results_dir + '/final_output' + '_' + str(outerloop) + '.txt', init_guess.cpu().data.numpy()) np.savetxt( results_dir + '/final_output_density' + '_' + str(outerloop) + '.txt', input_density_img[0, 0, :, :].cpu().data.numpy()) np.savetxt(results_dir + '/final_output' + '.txt', init_guess.cpu().data.numpy()) np.savetxt( results_dir + '/final_output_density.txt', input_density_img[0, 0, :, :].cpu().data.numpy()) print() with open( results_dir + '/log_losses_' + str(outerloop) + '.txt', "w") as f: for s in log_losses: f.write(str(s.tolist()[0]) + "\n") return loss optimizer.step(closure) outerloop += 1 init_guess.data.clamp_(0, 1) return init_guess
def AdditiveNN_Train(segmentData=None, segFile=None, cat_idx=None, linearFit=True): def transform_inputs(xx): ntp, dim = xx.shape dim_transform = 3 xx_transform = numpy.zeros((ntp, dim_transform)) xx_transform[:, :3] = xx #xx_transform[:,3] = xx[:,2]*xx[:,2] # xy*xy """ xx_transform[:,4] = xx[:,0]*xx[:,0] # xx*xx xx_transform[:,5] = xx[:,1]*xx[:,1] # yy*yy xx_transform[:,6] = xx[:,0]*xx[:,1] # xx*yy xx_transform[:,7] = xx[:,0]*xx[:,2] # xy*xx xx_transform[:,8] = xx[:,2]*xx[:,1] # xy*yy """ return xx_transform, dim_transform #Read data print("train") xx, yy = ReadData(stress_scale, strain_scale, True, "Training_data/", segment=segmentData, segmentFile=segFile, cat_idx=cat_idx) print("test") xx_test, yy_test = ReadData(stress_scale, strain_scale, True, "Test_data/", segment=segmentData, segmentFile=segFile, cat_idx=cat_idx) ntp, dim = xx.shape ntp_test, dim_test = xx_test.shape xx_transform, dimT = transform_inputs(xx) if segFile is None: n_cats = N_CATS else: n_cats = 1 name = "Net_Map3" model = Net_Map3(n_categories=n_cats, ninputs=dimT) #name = "Net_Map" #model = Net_Map() inputs = torch.from_numpy(xx_transform).view(ntp, dimT) if (linearFit): #Linear fit to get H, h H, h = LinearReg(xx, yy, "Orth", stress_scale, strain_scale) yy_linear_fit = numpy.dot(xx, H) yy_test_linear_fit = numpy.dot(xx_test, H) output_np = (yy - yy_linear_fit) else: output_np = yy[:, iidx] outputs = torch.from_numpy(output_np).view(ntp, 1) optimizer = optim.LBFGS(model.parameters(), lr=0.8, max_iter=1000, line_search_fn='strong_wolfe') #optimizer = optim.SGD(model.parameters(), lr=0.0005) # L2 regularization factor = torch.tensor(reg_factor * ntp) Nite = 5 for i in range(Nite): print("Iteration : ", i) def closure(printFlag=False): optimizer.zero_grad() sigma = model(inputs) l2_loss = torch.tensor(0.) for param in model.parameters(): # l2_loss += param.norm() loss1 = (torch.sum( (sigma - outputs)**2)) * stress_scale * stress_scale #loss1 = (torch.sum((sigma - outputs) ** 2 * torch.Tensor([1.0,1.0,1.0e3]))) * stress_scale * stress_scale loss2 = (factor * l2_loss) * stress_scale * stress_scale loss = loss1 + loss2 #if segFile is None: #loss -= 1e12 * model.classify_loss(inputs) #loss.backward(retain_graph=True) loss.retain_grad() loss.backward(retain_graph=True) if printFlag: print("loss {0:e}, loss1 {1:e}, loss2 = {2:e} ".format( loss.item(), loss1.item(), loss2.item())) gradnorm = 0 for param in model.parameters(): gradnorm += param.grad.norm() print("gradnorm: {0:e} ".format(gradnorm)) return loss #closure(printFlag=True) optimizer.step(closure) xx_transform, dimT = transform_inputs(xx) yy_pred_norm = model(torch.from_numpy(xx_transform).view(ntp, dimT)) yy_pred = yy_pred_norm.data.numpy() if linearFit: res_train = yy - yy_linear_fit - yy_pred else: res_train = numpy.copy(yy) res_train[:, iidx] = res_train[:, iidx] - yy_pred[:, 0] print( "Train fro error =", numpy.linalg.norm(res_train, ord='fro') * stress_scale * stress_scale) print( "Train fro relative error = ", numpy.linalg.norm(res_train, ord='fro') / numpy.linalg.norm(yy, ord='fro'), " ", numpy.linalg.norm(res_train[:, 0:1], ord='fro') / numpy.linalg.norm(yy[:, 0:1], ord='fro'), " ", numpy.linalg.norm(res_train[:, 1:2], ord='fro') / numpy.linalg.norm(yy[:, 1:2], ord='fro'), " ", numpy.linalg.norm(res_train[:, 2:3], ord='fro') / numpy.linalg.norm(yy[:, 2:3], ord='fro')) print("Sum terms train fro error = ", numpy.linalg.norm(res_train[:, 0:1], ord='fro')**2, " ", numpy.linalg.norm(yy[:, 0:1], ord='fro')**2, " ", numpy.linalg.norm(res_train[:, 1:2], ord='fro')**2, " ", numpy.linalg.norm(yy[:, 1:2], ord='fro')**2, " ", numpy.linalg.norm(res_train[:, 2:3], ord='fro')**2, " ", numpy.linalg.norm(yy[:, 2:3], ord='fro')**2) if segFile is None and N_CATS > 1: model.print_linear_params() name = "NN-ReLU" if linearFit: yy_pred = yy_linear_fit + yy_pred #MyScatter(xx, yy, yy_pred, name, stress_scale, strain_scale) ############ Test xx_transform, dimT = transform_inputs(xx_test) t0 = time.perf_counter() yy_test_pred_norm = model( torch.from_numpy(xx_transform).view(ntp_test, dimT)) t1 = time.perf_counter() """ print("----------------------------------") print("Time spent predicting: ", t1 - t0) print("----------------------------------") """ yy_test_pred = yy_test_pred_norm.data.numpy() if linearFit: res_test = yy_test - yy_test_linear_fit - yy_test_pred else: res_test = numpy.copy(yy_test) res_test[:, iidx] = res_test[:, iidx] - yy_test_pred[:, 0] print("Test fro error =", numpy.linalg.norm(res_test, ord='fro') * stress_scale * stress_scale) print( "Test fro relative error = ", numpy.linalg.norm(res_test, ord='fro') / numpy.linalg.norm(yy_test, ord='fro'), " ", numpy.linalg.norm(res_test[:, 0:1], ord='fro') / numpy.linalg.norm(yy_test[:, 0:1], ord='fro'), " ", numpy.linalg.norm(res_test[:, 1:2], ord='fro') / numpy.linalg.norm(yy_test[:, 1:2], ord='fro'), " ", numpy.linalg.norm(res_test[:, 2:3], ord='fro') / numpy.linalg.norm(yy_test[:, 2:3], ord='fro')) print("Sum terms fro error = ", numpy.linalg.norm(res_test[:, 0:1], ord='fro')**2, " ", numpy.linalg.norm(yy_test[:, 0:1], ord='fro')**2, " ", numpy.linalg.norm(res_test[:, 1:2], ord='fro')**2, " ", numpy.linalg.norm(yy_test[:, 1:2], ord='fro')**2, " ", numpy.linalg.norm(res_test[:, 2:3], ord='fro')**2, " ", numpy.linalg.norm(yy_test[:, 2:3], ord='fro')**2) if linearFit: yy_test_pred = yy_test_linear_fit + yy_test_pred #MyScatter(xx_test, yy_test, yy_test_pred, name, stress_scale, strain_scale, "Test") ########### Save to cpp file example = torch.rand([1, dimT]).double() traced_script_module = torch.jit.trace(model, example) output = traced_script_module(torch.ones([1, dimT]).double()) traced_script_module.save("model" + name + "Additive.pt")
def get_stylized_portret(style, portret, person_mask): """ style, portret, mask - PIL Images returns: portret with style transfered from image style """ #get network ### how to do this once at the beginning?? print("start loading vgg") vgg = VGG() vgg.load_state_dict(torch.load(model_dir + 'vgg_conv.pth')) for param in vgg.parameters(): param.requires_grad = False if torch.cuda.is_available(): vgg.cuda() print("end loading vgg") #load images, ordered as [style_image, content_image] # img_names = ['style.jpg', '1.jpg'] # img_dirs = ['/content/drive/My Drive/Colab Notebooks/DL IAD/4/', image_dir] # imgs = [Image.open(img_dirs[i] + name) for i,name in enumerate(img_names)] print("start preprocessing imgs") imgs = [style, portret] imgs_torch = [prep(img) for img in imgs] if torch.cuda.is_available(): imgs_torch = [Variable(img.unsqueeze(0).cuda()) for img in imgs_torch] else: imgs_torch = [Variable(img.unsqueeze(0)) for img in imgs_torch] style_image, content_image = imgs_torch # opt_img = Variable(torch.randn(content_image.size()).type_as(content_image.data), requires_grad=True) #random init opt_img = Variable(content_image.data.clone(), requires_grad=True) print("end preprocessing imgs") #define layers, loss functions, weights and compute optimization targets style_layers = ['r11', 'r21', 'r31', 'r41', 'r51'] content_layers = ['r42'] loss_layers = style_layers + content_layers loss_fns = [GramMSELoss()] * len( style_layers) + [masked_loss(person_mask)] * len(content_layers) if torch.cuda.is_available(): loss_fns = [loss_fn.cuda() for loss_fn in loss_fns] #these are good weights settings: style_weights = [1e3 / n**2 for n in [64, 128, 256, 512, 512]] content_weights = [1e0] weights = style_weights + content_weights print("end of layer preparation") print("start of target preparation (using vgg)") #compute optimization targets style_targets = [ GramMatrix()(A).detach() for A in vgg(style_image, style_layers) ] content_targets = [A.detach() for A in vgg(content_image, content_layers)] targets = style_targets + content_targets print("end of target preparation") #run style transfer # max_iter = 200 # show_iter = 50 show_iter = 2 max_iter = 10 optimizer = optim.LBFGS([opt_img]) n_iter = [0] print("start of optimization (using vgg)") while n_iter[0] <= max_iter: def closure(): optimizer.zero_grad() out = vgg(opt_img, loss_layers) layer_losses = [ weights[a] * loss_fns[a](A, targets[a]) for a, A in enumerate(out) ] loss = sum(layer_losses) loss.backward() n_iter[0] += 1 #print loss if n_iter[0] % show_iter == (show_iter - 1): print('Iteration: %d, loss: %f' % (n_iter[0] + 1, loss.item())) # print([loss_layers[li] + ': ' + str(l.data[0]) for li,l in enumerate(layer_losses)]) #loss of each layer return loss optimizer.step(closure) #display result out_img = postp(opt_img.data[0].cpu().squeeze()) # try: out_img_np = np.array(out_img) fmem = io.BytesIO() imsave(fmem, out_img_np, 'png') fmem.seek(0) out_img64 = base64.b64encode(fmem.read()).decode('utf-8') # except Exception as e: # print(e) # print("\nout_img with style", type(out_img), "\n") return out_img64
def get_input_param_optimier(input_img): input_param = nn.Parameter(input_img.data) #论文作者建议用LBFGS作为优化函数 optimizer = optim.LBFGS([input_param]) return input_param, optimizer
def train(arguments,trainData,device,criterion,model): # Set default logging format formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') def setup_logger(name, log_file, level=logging.INFO): # """Function setup as many loggers as you want""" handler = logging.FileHandler(log_file) handler.setFormatter(formatter) logger = logging.getLogger(name) logger.setLevel(level) logger.addHandler(handler) return logger print('Defining some tools') # This matrix records the current confusion across classes # In python we will initialize it later # confusion = confusion_matrix(labels=classes) # Convert to 2d data samplesShape = trainData.dataset.data.shape print(trainData.dataset.data.reshape(samplesShape[0], samplesShape[1] * samplesShape[2] * samplesShape[3])) # -- Retrieve parameters and gradients: # -- this extracts and flattens all the trainable parameters of the mode # -- into a 1-dim vector # Not needed, added to the optimizer at once # if model is not None: # oldparameters,oldgradParameters = model.parameters() optimizer = arguments.optim_Method.upper() print('Configuring Optimizer') if optimizer == 'CG': # No CG model in torch # Insert Values maxIter = arguments.max_iter optimMethod = optim.Optimizer(model.parameters()) elif optimizer == 'LBFGS': # !!!NEEDS CLOSURE FUNCTION # Insert Values maxIter = arguments.max_iter learningRate = arguments.lr optimMethod = optim.LBFGS(model.parameters(), lr=learningRate, max_iter=maxIter) elif optimizer == 'SGD': # Insert Values weightDecay = arguments.weight_decay learningRate = arguments.lr momentum = arguments.momentum optimMethod = optim.SGD(model.parameters(), lr=learningRate, momentum=momentum, weight_decay=weightDecay) elif optimizer == 'ASGD': learningRate = arguments.lr eta0 = arguments.t0 optimMethod = optim.ASGD(model.parameters(), lr=learningRate, t0=eta0 * trainData.dataset.data.size) elif optimizer == 'ADAM': learningRate = arguments.lr optimMethod = optim.Adam(model.parameters(), lr=learningRate) else: raise ValueError('Uknown optimization method') print(model.parameters()) # !!!!!START TRAINING!!!! # Since train is called multiple times it is checked if it is loaded in the memory first # !!!!WORKS LIKE THIS IN LUA, IN PYTHON WE WILL NEED ANOTHER WAY # Set model to training mode model = model.train() print('************************************\n MODEL IS CUDA:' + str(next(model.parameters()).is_cuda) + '************************************\n') # do one epoch print('--->Doing epoch on training data') print("--->Online epoch # " + str(arguments.epochs) + "[batchSize = " + str(arguments.batch_Size) + "]") # Begin Fetching batches from Dataloader # Got this part from https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html time = datetime.now() for i in range(arguments.epochs): print('Epoch #' + str(i)) k = 0 # Reduce learning rate on each epoch # for param_group in optimMethod.param_groups: # param_group['lr'] = param_group['lr'] * 0.99 # Training for index, (data, target) in enumerate(trainData): # Transfer to GPU data, target = data.cuda(), target.cuda() # Forward pass to the NN if optimizer == 'LBFGS': # If optimizer needs eval function # Το τρεχω σωστα?? def closure(): optimMethod.zero_grad() outputs, tsne_results = model.forward(data) print(outputs.size()) loss = criterion(outputs, target) loss.backward() return loss loss = optimMethod.step(closure) print('Loss for batch ' + str(index) + ': ' + str(loss[0])) else: # if optimizer does not need eval function outputs, tsne_results, kmeans_data = model.forward(data) loss = criterion(outputs, target) # BackProp and optimize optimMethod.zero_grad() loss.backward() optimMethod.step() # Feval) #print('Loss for batch ' + str(index) + ': ' + str(loss.data)) # Print tsne result at the last batch of each epoch if (k < 3): plt.scatter(tsne_results[:,0] , tsne_results[:,1]) plt.scatter(kmeans_data.cluster_centers_[:,0] , kmeans_data.cluster_centers_[:,1], s=250, marker='*', c='red', edgecolor='black', label='centroids') plt.show() plt.clf() k = k +1 print("Features for epoch: " + str(i)) # Clear axes #Time for each epoch print(datetime.now() - time) # Save current trained net torch.save(model.state_dict(), 'model_' + arguments.neural_network + '_' + arguments.loss + '_' + optimizer + '.pt') # load with model.load_state_dict and model.eval() to get the correct results print("Model saved with name:" + 'model_' + arguments.neural_network + '_' + arguments.loss + '_' + optimizer + '.pt') torch.save(criterion.state_dict(), 'optimizer_' + arguments.neural_network + '_' + arguments.loss + '_' + optimizer + '.pt') print("Optimizer saved with name:" + 'optimizer' + arguments.neural_network + '_' + arguments.loss + '_' + optimizer + '.pt')
def run_style_transfer(self, content_path, style_paths, spatial_mask=None, imsize=128, num_steps=300, style_weight=1000000, content_weight=1): """Executes style transfer on given image with given style images and masks. Parameters ---------- content_path : str The path to the image on which the style transfer shall be executed style_paths : list of str The paths to the images which style shall be transfered spatial_mask : list torch.tensor, optional The masks which determine in which area of the image which masks shall be applied (default is None) imsize : int, optional The image size (default is 128) num_steps : int, optional The number of steps which shall be executed for the style transfer (default is 300) style_weight : int, optional The weight of how much the loss in style transfer contributes to the overall loss (default is 1000000) content_weight : int, optional The weight of how much the loss in content preservation contributes to the overall loss (default is 1) """ image_loader = get_image_loader(imsize, self.device) style_img = [image_loader(img_pth) for img_pth in style_paths] content_img = image_loader(content_path) input_img = content_img.clone() for k in range(len(style_img)): assert style_img[k].size() == content_img.size() """Run the style transfer.""" print('Building the style transfer model..') model, style_losses, content_losses = self.get_style_model_and_losses_lists(style_img, content_img, spatial_mask=spatial_mask) optimizer = optim.LBFGS([input_img.requires_grad_()]) print('Optimizing..') run = [0] while run[0] <= num_steps: def closure(): # correct the values of updated input image input_img.data.clamp_(0, 1) optimizer.zero_grad() model(input_img) style_score = 0 content_score = 0 for sl in style_losses: style_score += sl.loss for cl in content_losses: content_score += cl.loss style_score *= style_weight content_score *= content_weight loss = style_score + content_score loss.backward() run[0] += 1 if run[0] % 50 == 0: print("run {}:".format(run)) print('Style Loss : {:4f} Content Loss: {:4f}'.format( style_score.item(), content_score.item())) print() return style_score + content_score optimizer.step(closure) # a last correction... input_img.data.clamp_(0, 1) return input_img
def train(csv_data, train_to_test, data_col, time_col, seq_l, num_epochs, num_hidden, num_cells, lr, print_test_loss=1, device=None): """ train the classifier and print the training loss of the each epoch. Uses MSEloss as criteria :param csv_data: CSVFileManager object containing test data :param train_to_test: Train to test data size ratio between 0-1 exclusive :param data_col: # column of the target data in csv_data.data dataframe :param time_col: # column of the target timestamp in csv_data.data dataframe :param seq_l: sequence length :param num_epochs: Number of training cycles :param num_hidden: Number of hidden units :param num_cells: Number of LSTM cells :param lr: learning rate of optimizer :param print_test_loss: Number of epochs after which test loss is evaluated :param device: device on which the model is trained, can be "cpu" or "gpu" :return: trained LSTM classifier """ result_file_path = "C://Users//Mahesh.Bhosale//PycharmProjects//Idle_bot//Predictor//CPU_predictor//Results//" future = 500 file_name = "c" + str(number_cells) + "h" + str(number_hidden) + "e" + str(num_epochs) + "f" + str(future) \ + "seq" + str(seq_length) + ".png" result_file_path = result_file_path + file_name total_size = csv_data.data.shape[0] train_size = math.floor(total_size * train_to_test) train_size = math.floor(train_size / seq_l) * seq_l data = csv_data.data.iloc[:train_size + 1, data_col] iput = data.iloc[:-1] target = data.iloc[1:] iput = torch.from_numpy(iput.values.reshape(-1, seq_length)) target = torch.from_numpy(target.values.reshape(-1, seq_length)) seq = Seq2seq(num_hidden=num_hidden, num_cells=num_cells, device=device) seq.to(seq.device) seq.double() iput = iput.to(seq.device) target = target.to(seq.device) iput.double() target.double() criteria = nn.MSELoss() optimizer = optim.LBFGS(seq.parameters(), lr=lr) for epoch in range(num_epochs): print('EPOCH: ', epoch) def closure(): optimizer.zero_grad() out = seq(iput) l_train = criteria(out, target) print('loss:', l_train.item()) l_train.backward() return l_train optimizer.step(closure) if (epoch + 1) == print_test_loss: test(csv_data=csv_data, train_size=train_size, test_size=total_size - train_size, data_col=data_col, time_col=time_col, seq=seq, future=future, result_file=result_file_path, show=1) elif (epoch + 1) % print_test_loss == 0: test(csv_data=csv_data, train_size=train_size, test_size=total_size - train_size, data_col=data_col, time_col=time_col, seq=seq, future=future, result_file=None, show=0) return seq
def style_transfer(content_img, style_img, max_iters=300, style_weight=1e7, content_weight=1, layers='shallow', verbose=True): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # print(device) scaled_size = 128 preprocess = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(scaled_size), transforms.ToTensor(), ]) content_img = preprocess(content_img)[None, ...] style_img = preprocess(style_img)[None, ...] if layers == 'shallow': vgg_wrapper = VGGWrapper(layer_dict) else: vgg_wrapper = VGGWrapper(deep_layer_dict) content_layer = 4 content_targets = vgg_wrapper(content_img)[content_layer].detach() style_targets = [ gram_matrix(layer).detach() for layer in vgg_wrapper(style_img) ] x = content_img.clone() optimizer = optim.LBFGS([x.requires_grad_()]) iters = [0] while iters[0] <= max_iters: def closure(): x.data.clamp_(0, 1) optimizer.zero_grad() # forward pass layers = vgg_wrapper(x) content_scores = layers[content_layer] style_scores = [gram_matrix(layer) for layer in layers] # calculate losses style_loss = style_weight * sum([ nn.MSELoss()(s, t) for s, t in list(zip(style_scores, style_targets)) ]) content_loss = content_weight * nn.MSELoss()(content_scores, content_targets) iters[0] += 1 if iters[0] % 50 == 0 and verbose: print('[%d]\tContent loss: %.04f\tStyle loss: %.04f' % (iters[0], content_loss.item(), style_loss.item())) loss = style_loss + content_loss # backpropagate loss.backward() return loss optimizer.step(closure) # note LBFGS calls the closure several times return x.data.clamp_(0, 1)
def run_style_transfer(self, cnn, normalization_mean, normalization_std, content_img, style_img, input_img, num_steps=500, style_weight=100000, content_weight=1): """Run the style transfer.""" print('Building the style transfer model..') model, style_losses, content_losses = self.get_style_model_and_losses( cnn, normalization_mean, normalization_std, style_img, content_img) optimizer = optim.LBFGS([input_img.requires_grad_()], max_iter=num_steps) print('Optimizing..') run = [0] # while run[0] <= num_steps: def closure(): if self.should_terminate_lambda: terminate = self.should_terminate_lambda() if terminate: raise Exception('Thread stopped') # correct the values # это для того, чтобы значения тензора картинки не выходили за пределы [0;1] input_img.data.clamp_(0, 1) optimizer.zero_grad() model(input_img) style_score = 0 content_score = 0 for sl in style_losses: style_score += sl.loss for cl in content_losses: content_score += cl.loss # взвешивание ощибки style_score *= style_weight content_score *= content_weight loss = style_score + content_score loss.backward() run[0] += 1 if run[0] % 50 == 0: print("run {}:".format(run)) print('Style Loss : {:4f} Content Loss: {:4f}'.format( style_score.item(), content_score.item())) print() if self.progress_lambda != None: self.progress_lambda(run[0] / num_steps) return style_score + content_score optimizer.step(closure) # a last correction... input_img.data.clamp_(0, 1) return input_img
def run(model, img, num_steps, weights, losses, sched): """ Run the Gatys et al. algorithm. Inputs ------ - model : the model to use - img : a dictionary with images (here, only input image) - num_steps : the number of steps (epochs) - weights : a dictionary with weights for content and style layers - losses : a dictionary with lists of content and style layers - sched : a dictionary with scheduler parameter """ # Adds the input image to the gradient descent optimizer = optim.LBFGS([img['input'].requires_grad_()]) # Set a decaying learning rate scheduler = StepLR(optimizer, step_size=sched['step_size'], gamma=sched['gamma']) # Save the scores style_scores = [] content_scores = [] run = [0] while run[0] < num_steps: def closure(): # Steps in the scheduler scheduler.step() # Limits the values of the updated image img['input'].data.clamp_(0, 1) # Reset the gradients to zero before the backpropagation optimizer.zero_grad() model(img['input']) # Calculate the scores style_score = 0 content_score = 0 for loss, weight in zip(losses['style'], weights['style_losses']): style_score += loss.loss * weight for loss, weight in zip(losses['content'], weights['content_losses']): content_score += loss.loss * weight style_score *= weights['style'] content_score *= weights['content'] style_scores.append(style_score.item()) content_scores.append(content_score.item()) # Calculate the total loss and backpropagate it loss = style_score + content_score loss.backward() run[0] += 1 step = int((run[0] / (num_steps + (num_steps % 20))) * 50) print('[Progress : {}/{}] [{}{}]'.format( str(run[0]).rjust(len(str((num_steps)))), (num_steps + (num_steps % 20)), '=' * step, ' ' * (50 - step)), end='\r') return style_score + content_score optimizer.step(closure) # Small correction to the image img['input'].data.clamp_(0, 1) return img['input'], style_scores, content_scores
def __get_input_optimizer(input_img): # utilizziamo una ottimizzazione numerica optimizer = optim.LBFGS([input_img.requires_grad_()]) return optimizer
sinkhorn_net = Sinkhorn_Net(args.sink_z_dim, args.data_variable_size, args.dropout_prob) # BirkhoffPolytope birkhoff = BirkhoffPoly(num_nodes) #=================================== # set up training parameters #=================================== if args.optimizer == 'Adam': optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=args.lr) elif args.optimizer == 'LBFGS': optimizer = optim.LBFGS(list(encoder.parameters()) + list(decoder.parameters()), lr=args.lr) elif args.optimizer == 'SGD': optimizer = optim.SGD(list(encoder.parameters()) + list(decoder.parameters()), lr=args.lr) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.lr_decay, gamma=args.gamma) # set up Riemannian Adam # rie_optimizer = RiemannianAdam(birkhoff.parameters(), lr=args.lr) rie_optimizer = optim.Adam(birkhoff.parameters(), lr=args.lr) if args.prior:
def style_transfer(model, content_img, style_img, input_img, default_mean_std=True, num_steps=300, style_weight=1000000, content_weight=1): """Run the style transfer.""" print('Building the style transfer model..') model, style_losses, content_losses = generate_model(cnn, style_img, content_img, default_mean_std=True) #optimizer = get_input_optimizer(input_img) optimizer = optim.LBFGS([input_img.requires_grad_()]) print('Optimizing..') s_losses, c_losses, t_losses = [], [], [] run = [0] def closure(): # correct the values of updated input image input_img.data.clamp_(0, 1) optimizer.zero_grad() model(input_img) style_score = 0 content_score = 0 # extract the losses for sl in style_losses: style_score += sl.loss / len(style_losses) for cl in content_losses: content_score += cl.loss style_score *= style_weight content_score *= content_weight loss = style_score + content_score loss.backward() run[0] += 1 end = time.time() times.append(round(end - start, 2)) if run[0] % 10 == 0: s_losses.append(style_score) c_losses.append(content_score) t_losses.append(loss) if run[0] % 50 == 0: print("run {}:".format(run)) print('Style Loss : {:4f} Content Loss: {:4f}'.format( style_score.item(), content_score.item())) print() return style_score + content_score times = [0] start = time.time() while run[0] <= num_steps: optimizer.step(closure) # a last correction... input_img.data.clamp_(0, 1) return input_img, model(input_img), times, s_losses, c_losses, t_losses
def __get_input_optimizer(input_img): # this line to show that input is a parameter that requires a gradient # добоваляет содержимое тензора катринки в список изменяемых оптимизатором параметров optimizer = optim.LBFGS([input_img.requires_grad_()]) return optimizer
def get_input_param_optimizer(input_img): # this line to show that input is a parameter that requires a gradient input_param = nn.Parameter(input_img.data) optimizer = optim.LBFGS([input_param]) return input_param, optimizer
def stylize(model, g, content_tensor, style_tensor, iteration=1000, TV_WEIGHT=1e-3, STYLE_WEIGHT=1e2, CONTENT_WEIGHT=15e0, OPTIMIZER="adam", ADAM_LR=10, PRESERVE_COLOR='False', SHOW_ITER=200): # Get features representations/Forward pass content_layers = ['relu4_2'] content_weights = {'relu4_2': 1.0} style_layers = ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'] style_weights = { 'relu1_2': 0.2, 'relu2_2': 0.2, 'relu3_3': 0.2, 'relu4_3': 0.2, 'relu5_3': 0.2 } c_feat = get_features(model, content_tensor) s_feat = get_features(model, style_tensor) mse_loss = torch.nn.MSELoss() if (OPTIMIZER == 'lbfgs'): optimizer = optim.LBFGS([g]) else: optimizer = optim.Adam([g], lr=ADAM_LR) it = 0 while it < iteration: def closure(): # Zero-out gradients optimizer.zero_grad() # Forward pass g_feat = get_features(model, g) # Compute Losses c_loss = 0 s_loss = 0 for j in content_layers: c_loss += content_weights[j] * content_loss( g_feat[j], c_feat[j], mse_loss) for j in style_layers: s_loss += style_weights[j] * style_loss( g_feat[j], s_feat[j], mse_loss) c_loss = CONTENT_WEIGHT * c_loss s_loss = STYLE_WEIGHT * s_loss t_loss = TV_WEIGHT * tv_loss(g.clone().detach()) total_loss = c_loss + s_loss + t_loss # Backprop total_loss.backward(retain_graph=True) # Print Loss if it % 50 == 0: print( "Style Loss: {} Content Loss: {} TV Loss: {} Total Loss : {}" .format(s_loss.item(), c_loss.item(), t_loss, total_loss.item())) return (total_loss) # Weight/Pixel update optimizer.step(closure) it += 1 return g
def maximise(Fis, kstar, Vis, dist_indices, Fij_list, Fij_var_list, alpha, alg="BFGS"): N = Fis.shape[0] Fis = Fis + np.random.normal(0, 1, size=(N,)) * 0.01 Fis_t = t.from_numpy(Fis).float().to(device) Fis_t.requires_grad_() Vis_t = t.from_numpy(Vis).float().to(device) kstar_t = t.from_numpy(kstar).float().to(device) Fijs_t = t.ones(N, N).float().to(device) Fijs_var_t = t.ones(N, N).float().to(device) mask = t.zeros(N, N).int().to(device) for i, (Fijs, Fijs_var) in enumerate(zip(Fij_list, Fij_var_list)): k = kstar[i] for nneigh in range(k): j = dist_indices[i, nneigh + 1] Fijs_t[i, j] = Fijs[nneigh] Fijs_var_t[i, j] = 2 * (Fijs_var[nneigh] + 1.0e-4) mask[i, j] = 1 def loss_fn(): deltas = Fis_t[None, :] - Fis_t[:, None] la = t.sum(kstar_t * Fis_t - Vis_t * t.exp(Fis_t)) lb = alpha * t.sum(mask * ((deltas - Fijs_t) ** 2 / Fijs_var_t)) # l = la - lb # l = lb return -l if alg == "BFGS": lr = 0.1 n_epochs = 50 optimiser = optim.LBFGS( [Fis_t], lr=lr, max_iter=20, max_eval=None, tolerance_grad=1e-7, tolerance_change=1e-9, history_size=100, ) for e in range(n_epochs): def closure(): optimiser.zero_grad() loss = loss_fn() loss.backward() return loss optimiser.step(closure) elif alg == "GD": lr = 1e-5 n_epochs = 25000 optimiser = optim.SGD([Fis_t], lr=lr) for e in range(n_epochs): loss = loss_fn() loss.backward() # with t.no_grad(): # Fis_t -= lr * Fis_t.grad # Fis_t.grad.zero_() optimiser.step() optimiser.zero_grad() if e % 1000 == 0: print(e, loss.item()) final_loss = loss_fn() return final_loss.data, Fis_t.detach().numpy()
def get_input_optimizer(input_img): # L-BFGS算法运行梯度下降。创建优化器。 #将图像作为张量进行优化。 optimizer = optim.LBFGS([input_img.requires_grad_()]) return optimizer
def maximise_wPAk_flatF( Fis, Fis_err, kstar, vij_list, dist_indices, alpha, alg="BFGS", onlyNN=False ): N = Fis.shape[0] Fis = Fis + np.random.normal(0, 1, size=(N,)) * 0.1 Fis_t = t.from_numpy(Fis).float().to(device) Fis_t.requires_grad_() Fis_err_t = t.from_numpy(Fis_err).float().to(device) PAk_ai_t = t.zeros(N, requires_grad=True, dtype=t.float, device=device) vijs_t = t.ones(N, N).float().to(device) nijs_t = t.ones(N, N).float().to(device) Fijs_t = t.ones(N, N).float().to(device) Fijs_var_t = t.ones(N, N).float().to(device) mask = t.zeros(N, N).int().to(device) if onlyNN is False: # keep all neighbours up to k* for i, vijs in enumerate(vij_list): k = kstar[i] for nneigh in range(k - 1): j = dist_indices[i, nneigh + 1] Fijs_t[i, j] = Fis_t[j] - Fis_t[i] Fijs_var_t[i, j] = 2 * (Fis_err[i] ** 2 + Fis_err[j] ** 2) vijs_t[i, j] = vijs[nneigh] nijs_t[i, j] = float(nneigh + 1) mask[i, j] = 1 else: # only correlate to first NN for i, vijs in enumerate(vij_list): k = kstar[i] for nneigh in range(1): j = dist_indices[i, nneigh + 1] Fijs_t[i, j] = Fis_t[j] - Fis_t[i] Fijs_var_t[i, j] = 2 * (Fis_err[i] ** 2 + Fis_err[j] ** 2) vijs_t[i, j] = vijs[nneigh] nijs_t[i, j] = float(nneigh + 1) mask[i, j] = 1 def loss_fn(): # deltas = (Fis_t[None, :] - Fis_t[:, None]) PAk_corr = PAk_ai_t[:, None] * nijs_t Fis_corr = Fis_t[:, None] + PAk_corr la = t.sum(mask * Fis_corr) - t.sum(mask * (vijs_t * t.exp(Fis_corr))) # lb = alpha * t.sum(mask * ((deltas - Fijs_t) ** 2 / Fijs_var_t)) lb = alpha * t.sum(mask * ((Fijs_t**2) / Fijs_var_t)) l = la - lb return -l if alg == "BFGS": lr = 0.5 n_epochs = 50 optimiser = optim.LBFGS( [Fis_t, PAk_ai_t], lr=lr, max_iter=20, max_eval=None, tolerance_grad=1e-7, tolerance_change=1e-9, history_size=100, ) for e in range(n_epochs): def closure(): optimiser.zero_grad() loss = loss_fn() loss.backward() return loss optimiser.step(closure) elif alg == "GD": lr = 1e-7 n_epochs = 25000 optimiser = optim.SGD([Fis_t, PAk_ai_t], lr=lr) for e in range(n_epochs): loss = loss_fn() loss.backward() optimiser.step() optimiser.zero_grad() if e % 1000 == 0: print(e, loss.item()) final_loss = loss_fn() return final_loss.data, Fis_t.detach().numpy()
def get_input_optimizer(input_img): optimizer = optim.LBFGS([input_img.requires_grad_()]) return optimizer
def optimise_metric_vectors( gammaij, d=2, lr=1e-3, n_epochs=10000, alg="GD", vi_init=None ): n_metrics = gammaij.shape[0] # convert the losses to tensor format gammaij_t = t.from_numpy(gammaij).float().to(device) # initialise the weights at random # t.manual_seed(1) # np.random.seed(1) if vi_init is not None: assert vi_init.shape[0] == n_metrics and vi_init.shape[1] == d vi_t = t.from_numpy(vi_init).float().to(device) vi_t.requires_grad_() else: vi_t = t.randn(n_metrics, d, device=device, dtype=t.float, requires_grad=True) # vi = np.random.normal(0, 1, size=(n_metrics, d)) # vi0 = np.zeros(d) # vi0[0] = 1. # vi[0, :] = vi0 # vi_t = t.from_numpy(vi).float().to(device) # vi_t.requires_grad_() # define a loss function def loss_fn(): norms_i = t.norm(vi_t, p=2, dim=1) vin_t = vi_t / norms_i[:, None] vivjn_t = t.mm(vi_t, vin_t.T) gammaij_approx_t = norms_i[None, :] - vivjn_t l = 0.5 * t.sum((gammaij_t - gammaij_approx_t) ** 2) return l losses = [] if alg == "GD": optimiser = optim.SGD([vi_t], lr=lr) for e in range(n_epochs): loss = loss_fn() loss.backward() # can set gradient for first vector to zero # vi_t.grad[0] = 0 optimiser.step() optimiser.zero_grad() if e % 1000 == 0: print(e, loss.item()) losses.append(loss.item()) elif alg == "BFGS": optimiser = optim.LBFGS( [vi_t], lr=lr, max_iter=25, max_eval=None, tolerance_grad=1e-7, tolerance_change=1e-9, history_size=100, ) for e in range(n_epochs): def closure(): optimiser.zero_grad() loss = loss_fn() loss.backward() return loss optimiser.step(closure) if e % 100 == 0: with t.no_grad(): loss = loss_fn() print(e, loss.item()) losses.append(loss.item()) # compute final gammaij_approx with t.no_grad(): final_loss = loss_fn().item() norms_i = t.norm(vi_t, p=2, dim=1) vin_t = vi_t / norms_i[:, None] vivjn_t = t.mm(vi_t, vin_t.T) gammaij_approx_t = norms_i[None, :] - vivjn_t return vi_t.detach().numpy(), gammaij_approx_t.detach().numpy(), losses
def test_lbfgs(self): self._test_rosenbrock(lambda params: optim.LBFGS(params), wrap_old_fn(old_optim.lbfgs))
# set random seed for reproducibility torch.manual_seed(0) np.random.seed(0) learning_rate = 1 # 1 for BFGS, 0.001 for Adam no_iters = 200 no_inputs = 1 # dimensionality of input BFGS = True # use Adam or BFGS # initialise fullGP fullGP = GP(no_inputs) # optimize hyperparameters if BFGS == True: optimizer = optim.LBFGS(fullGP.parameters(), lr=learning_rate) else: optimizer = optim.Adam(fullGP.parameters(), lr=learning_rate) with trange(no_iters) as t: for i in t: if BFGS == True: def closure(): optimizer.zero_grad() NLL = -fullGP.get_LL(train_inputs, train_outputs) NLL.backward() return NLL optimizer.step(closure) NLL = -fullGP.get_LL(train_inputs, train_outputs)
def get_input_optimizer(self, input_img): optimizer = optim.LBFGS([input_img.requires_grad_()], lr=0.2) return optimizer
# set random seed to 0 np.random.seed(0) torch.manual_seed(0) # load data and make training set data = torch.load('traindata.pt') input=data[3:, :-1] input = Variable(torch.from_numpy(input), requires_grad=False) target = Variable(torch.from_numpy(data[3:, 1:]), requires_grad=False) test_input = Variable(torch.from_numpy(data[:3, :-1]), requires_grad=False) test_target = Variable(torch.from_numpy(data[:3, 1:]), requires_grad=False) # build the model seq = Sequence() seq.double() criterion = nn.MSELoss() # use LBFGS as optimizer since we can load the whole data to train optimizer = optim.LBFGS(seq.parameters(), lr=0.8) print("Using CPU i7-7700k! \n") print("--- Training GRUs ---") #begin to train for i in range(15): print('STEP: ', i) def closure(): optimizer.zero_grad() out = seq(input) loss = criterion(out, target) # record train time training_time = time.time()-time_tr_start print('MSE: %.10f \t Total time: %.3f' % (loss.data.numpy()[0], training_time)) loss.backward() return loss