def style_transfer_image(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") image = Image.open(args.content_target) size = image.size content_target = image_to_tensor(image, size).to(device) style_targets = [ image_to_tensor(Image.open(image), size).to(device) for image in args.style_targets ] n = len(style_targets) style_weights = np.ones( n) / n if args.style_weights is None else args.style_weights input_image = content_target.clone().to(device) neural_style = NeuralStyle(content_layers=CONTENT_LAYERS, style_layers=STYLE_LAYERS) neural_style.content_target = content_target neural_style.set_style_targets(style_targets, style_weights) output_image, _ = neural_style.transfer( input_image=input_image, epochs=args.epochs, style_weight=args.style_weight, content_weight=args.content_weight, verbose=args.verbose, ) to_image(output_image, size=size).save(args.output)
def generate_image(self, name): b = self.get_noise(self.batch_size) z = self.get_latent_inputs(self.batch_size) Goz = self.G([z, b], training=False) to_image(Goz[0]).save(name + '.jpg')
def generate_from_mapper(self, name, latent_points): # Insert batch dimension to latent points z = tf.expand_dims(latent_points, axis=1) b = self.get_noise(1) Goz = self.G([z, b], training=False) to_image(Goz[0]).save(name + '.jpg')
def scan_image(path): # load the image and define the window width and height image = cv2.imread(path, cv2.IMREAD_COLOR) # loop over the image pyramid level = 0 lst_img = list() lst_imgDetail = list() ori_clone = image.copy() overlapImg = image.copy() for resized_image in pyramid(image, scale): # loop over the sliding window for each layer of the pyramid for (x, y, window) in sliding_window(resized_image, stepSize=stepSize, windowSize=(winW, winH)): # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue # THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A # MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE # WINDOW curWindow = (x, y, x + winW, y + winH) subImage = utils.to_image(resized_image).crop(curWindow) normalized_img = pre_processing_data.process_single_file(subImage) lst_img.append(normalized_img) imgDetail = (x, y, level, resized_image) lst_imgDetail.append(imgDetail) level += 1 # Predict all window lst_indexPositive, positive_scores = predict_multi(lst_img, model_path) time_now("fusing") for i in lst_indexPositive: subX, subY, subLevel, subImg = lst_imgDetail[i] ori_x, ori_y, new_winW, new_winH = reverse_window(subX, subY, subImg.shape[1], subImg.shape[0], scale ** subLevel, image.shape[1], image.shape[0], winW, winH) # Get positive image and save it ori_window = (ori_x, ori_y, ori_x + new_winW, ori_y + new_winH) # Draw rectangle on output image cv2.rectangle(ori_clone, (ori_x, ori_y), (ori_x + new_winW, ori_y + new_winH), (0, 255, 0), 2) lstRect.append(ori_window) overlappedLst = run_NMS(lstRect, positive_scores, 0.05) time_now("draw rect") for i in overlappedLst: x1, y1, x2, y2 = lstRect[i] cv2.rectangle(overlapImg, (x1, y1), (x2, y2), (0, 255, 0), 2) result_path = os.path.join(stored_path, 'result.png') not_overlap = os.path.join(stored_path, 'be4.png') utils.to_image(ori_clone).save(not_overlap, 'png') utils.to_image(overlapImg).save(result_path, 'png') return result_path, len(overlappedLst)
def train(opt, vis, epoch, train_loader, net, optimizer, scheduler): net = net.train() train_len = len(train_loader) start_time = time.time() scheduler.step() for iteration, batch in enumerate(train_loader): # Load Data im, label = batch im = im.cuda(non_blocking=True) label = label.cuda(non_blocking=True) # Forward Pass out = net(im) loss = F.cross_entropy(out, label) # Backward Pass optimizer.zero_grad() loss.backward() optimizer.step() scheduler.batch_step() # Logging cur_time = time.time() loss_scalar = float(loss.cpu().detach().numpy()) if iteration < opt.threads: print('{} [{}]({}/{}) AvgTime:{:>4} Loss:{:>4}'.format(opt.env, epoch, iteration, train_len, \ round((cur_time - start_time) / (iteration + 1), 2), \ round(loss_scalar, 4))) if iteration == opt.threads - 1: start_time = cur_time else: print('{} [{}]({}/{}) AvgTime:{:>4} Loss:{:>4}'.format(opt.env, epoch, iteration, train_len, \ round((cur_time - start_time) / (iteration + 1 - opt.threads), 2), \ round(loss_scalar, 4))) # Visualization vis.iteration.append(epoch + iteration / train_len) vis.nlogloss.append(-np.log(np.maximum(1e-6, loss_scalar))) vis.plot_loss() if opt.vis_iter <= 0 or iteration % opt.vis_iter > 0: continue prob, pred = torch.max(out, dim=1) vis_rgb = to_image(im[0, 0:3, :, :] * 0.5) vis_nir = to_image(im[0, 3:4, :, :] * 0.5) vis_swir1 = to_image(im[0, 4:5, :, :] * 0.5) vis_swir2 = to_image(im[0, -2:-1, :, :] * 0.5) vis_label = colormap(label[0].cpu().numpy()) vis_pred = colormap(pred[0].cpu().numpy()) vis_im = np.concatenate((np.concatenate((vis_label, vis_pred), axis=1), \ np.concatenate((vis_rgb, vis_nir), axis=1), \ np.concatenate((vis_swir1, vis_swir2), axis=1)), axis=2) vis.plot_image(vis_im, 0)
def test(opt, test_loader, net, split): start_time = time.time() eva = Evaluator(opt.n_classes, opt.bg_err) eva_crf = Evaluator(opt.n_classes, opt.bg_err) ims = [] labels = [] net = net.eval() for iteration, batch in enumerate(test_loader): im, label = batch im = im.cuda() label = label.cuda() out = net(im) prob = F.softmax(out, dim=1) for i in range(opt.batch_size): prob_np = prob[i].detach().cpu().numpy() label_np = label[i].cpu().numpy() im_np = im[i].cpu().numpy() ims.append(to_image(im[i, :3, :, :])) labels.append(label_np) eva.register(label_np, prob_np) prob_crf = crf(prob_np, im_np, opt.sdims, opt.schan, opt.compat, opt.iters) eva_crf.register(label_np, prob_crf) print( str(iteration * opt.batch_size + i).zfill(2), time.time() - start_time, 'seconds') msa, preds_msa, miou, miiou, preds_miou = eva.evaluate() msa_crf, preds_msa_crf, miou_crf, miiou_crf, preds_miou_crf = eva_crf.evaluate( ) print('Pre-CRF: MSA: {} mIoU: {} miIoU: {}'.format( round(msa * 100, 1), round(miou * 100, 1), round(miiou * 100, 1))) print('Post-CRF: MSA: {} mIoU: {} miIoU: {}'.format( round(msa_crf * 100, 1), round(miou_crf * 100, 1), round(miiou_crf * 100, 1))) for i, label in enumerate(labels): pred_msa = preds_msa[i] pred_msa_crf = preds_msa_crf[i] pred_miou = preds_miou[i] pred_miou_crf = preds_miou_crf[i] vis_im = ims[i] vis_label = colormap(label) vis_pred_msa = colormap(pred_msa) vis_pred_msa_crf = colormap(pred_msa_crf) vis_pred_miou = colormap(pred_miou) vis_pred_miou_crf = colormap(pred_miou_crf) vis_all = np.concatenate( (np.concatenate((vis_im, vis_label), axis=2), np.concatenate((vis_pred_miou, vis_pred_miou_crf), axis=2)), axis=1) vis_all = vis_all.transpose((1, 2, 0)) io.imsave( Path(opt.out_path) / split / (str(i).zfill(2) + '.png'), vis_all) return msa, miou, miiou, msa_crf, miou_crf, miiou_crf
def write_image(self, target, condition, output_length): output = self.net.module.get_output(target[:1], condition[:1], output_length) cleaned_input = clean(target[..., -output_length:]) self.writer.add_image('Score/Input', to_image(cleaned_input), self.step) cleaned_output = clean(output.argmax(dim=1)) self.writer.add_image('Score/Output', to_image(cleaned_output), self.step) score_confidence, confidence = get_confidence( torch.nn.functional.softmax(output, dim=1), cleaned_input) score_accuracy, accuracy = get_accuracy(output, cleaned_input) self.writer.add_histogram('Score/Confidence', score_confidence, self.step) self.writer.add_image('Score/Confindence_image', score_confidence, self.step) self.writer.add_image('Score/Accuracy_image', score_accuracy, self.step) self.writer.add_scalar('Train/Confidence', confidence, self.step) self.writer.add_scalar('Train/Accuracy', accuracy, self.step)
def generate_animation(self, name, frames, samples): frames_per_sample = frames // samples b = self.get_noise(self.batch_size) z1 = self.get_latent_inputs(self.batch_size) for i in range(samples): z2 = self.get_latent_inputs(self.batch_size) for j in range(frames_per_sample): # Linear interpolation of the two latent points z = lerp(z1, z2, j, frames_per_sample) Goz = self.G([z, b], training=False) # Frame number for putting animation together frame_no = i * frames_per_sample + j # Save the output for later processing (converting a batch of outputs directly to video can result in OOM) to_image(Goz[0]).save('./frames/frame' + str(frame_no) + '.jpg') z1 = z2 to_video(name)
def visualize(ds, model, args, epoch): ds.train() model.eval() dl = DataLoader(ds, args.batch_size, num_workers=args.num_workers, worker_init_fn=set_random_seed) with torch.no_grad(): x_res, x_crp, x_ff, y_res, y_crp = (d.to(device) for d in next(iter(dl))) # DO STUFF HERE - get the network prediction p_crp, p_res = model(x_res, x_crp, x_ff) image = to_image(p_res, y_res, x_ff) io.imsave(f'out/{epoch:03d}.png', image)
def generate(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--sgen', type=str, default=None) parser.add_argument('--depth', '-d', type=int, default=5) parser.add_argument('--out', '-o', type=str, default='img/') # parser.add_argument('--num', '-n', type=int, default=10) args = parser.parse_args() sgen = network.StyleBasedGenerator(depth=args.depth) print('loading generator model from ' + args.sgen) serializers.load_npz(args.sgen, sgen) # if args.gpu >= 0: # cuda.get_device_from_id(0).use() # sgen.to_gpu() # xp = sgen.xp dst = Image.new(mode='RGB', size=(L * (N + 1), L * (N + 1))) array_z = sgen.make_latent(N * 2) array_w = sgen.E(array_z) array_x = sgen.G(array_w) for i in range(N): dst.paste(utils.to_image(array_x[i].data), (L * (i + 1), 0)) dst.paste(utils.to_image(array_x[i + N].data), (0, L * (i + 1))) for i in range(N): for j in range(N): print(i, j) half = depth // 2 ws = [array_w[np.newaxis, i] ] * half + [array_w[np.newaxis, j + N]] * (depth + 1 - half) x = sgen.G.style_mixing(ws) dst.paste(utils.to_image(x[0].data), (L * (i + 1), L * (j + 1))) dst.save('table.jpg')
def style_transfer_video(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") loader = transforms.ToPILImage() reader = imageio.get_reader(args.content_target) frames = [ image_to_tensor(loader(reader.get_data(i)), (512, 512)) for i in range(reader.count_frames()) ] style_targets = [ image_to_tensor(Image.open(image), (512, 512)) for image in args.style_targets ] style_weights = np.linspace(0, 1, num=len(frames)) neural_style = NeuralStyle(content_layers=CONTENT_LAYERS, style_layers=STYLE_LAYERS).to(device) input_image = frames[0].to(device) outputs = [] for i in trange(len(frames)): neural_style.content_target = frames[i].to(device) neural_style.set_style_targets( style_targets, [1 - style_weights[i], style_weights[i]]) output_image = neural_style.transfer( input_image=input_image, epochs=args.epochs, style_weight=args.style_weight, content_weight=args.content_weight, verbose=args.verbose, ) # del frames[i] input_image = output_image.clone().to(device) outputs.append(output_image.to("cpu")) del output_image writer = imageio.get_writer("output.mp4", fps=reader.get_meta_data()["fps"]) shape = reader.get_data(0).shape[:2] outputs = [to_image(output, (shape[1], shape[0])) for output in outputs] for output in outputs: writer.append_data(np.asarray(output)) writer.close()
def generate(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--sgen', type=str, default=None) parser.add_argument('--depth', '-d', type=int, default=5) parser.add_argument('--out', '-o', type=str, default='img/') parser.add_argument('--num', '-n', type=int, default=10) args = parser.parse_args() sgen = network.StyleBasedGenerator(depth=args.depth) print('loading generator model from ' + args.sgen) serializers.load_npz(args.sgen, sgen) # if args.gpu >= 0: # cuda.get_device_from_id(0).use() # sgen.to_gpu() # xp = sgen.xp imgs = [] z1 = sgen.make_latent(1) for i in range(args.num): z2 = sgen.make_latent(1) w1 = sgen.E(z1) w2 = sgen.E(z2) for t in np.linspace(0, 1, 10): w = w1 * (1 - t) + w2 * t x = sgen.G(w) imgs.append(utils.to_image(x[0].data)) z1 = z2 imgs[0].save('analogy.gif', save_all=True, duration=100, append_images=imgs[1:], loop=True)
def sample(self, name: str, init: torch.Tensor, condition: torch.Tensor, temperature=1.): """Sampling: Wrapper around generate, handles saving to midi & saving roll as plot. Arguments -------------- name : int or str The name of the resulting sampled midi file. init : Tensor or None Initializing tensor for Wavenet in order for fast generation. Currently None is not supported. condition : Tensor or None Condition tensor for Wavenet. Currently None is not supported. temperature : float Sampling temperature; >1 means more randomness, <1 means less randomness. Returns -------------- to_image(roll) : np.array 2d piano roll representation of generated sample. """ if not os.path.isdir('Samples'): os.mkdir('Samples') roll = clean(self.generate(init, condition, temperature)) save_roll(roll, name) midi = piano_rolls_to_midi(roll) midi.write('Samples/{}.mid'.format(name)) tqdm.write('Saved to Samples/{}.mid'.format(name)) return to_image(roll)
def main(): n_epoch_pretrain = 2 use_tensorboard = True parser = argparse.ArgumentParser(description='SRGAN Train') parser.add_argument('--crop_size', default=128, type=int, help='training images crop size') parser.add_argument('--num_epochs', default=1000, type=int, help='training epoch') parser.add_argument('--batch_size', default=64, type=int, help='training batch size') parser.add_argument('--train_set', default='data/train', type=str, help='train set path') parser.add_argument('--check_point', type=int, default=-1, help="continue with previous check_point") opt = parser.parse_args() input_size = opt.crop_size n_epoch = opt.num_epochs batch_size = opt.batch_size check_point = opt.check_point check_point_path = 'cp/' if not os.path.exists(check_point_path): os.makedirs(check_point_path) train_set = TrainDataset(opt.train_set, crop_size=input_size, upscale_factor=4) train_loader = DataLoader(dataset=train_set, num_workers=2, batch_size=batch_size, shuffle=True) dev_set = DevDataset('data/dev', upscale_factor=4) dev_loader = DataLoader(dataset=dev_set, num_workers=1, batch_size=1, shuffle=False) mse = nn.MSELoss() bce = nn.BCELoss() #tv = TVLoss() if not torch.cuda.is_available(): print ('!!!!!!!!!!!!!!USING CPU!!!!!!!!!!!!!') netG = Generator() print('# generator parameters:', sum(param.numel() for param in netG.parameters())) netD = Discriminator() print('# discriminator parameters:', sum(param.numel() for param in netD.parameters())) if torch.cuda.is_available(): netG.cuda() netD.cuda() #tv.cuda() mse.cuda() bce.cuda() if use_tensorboard: writer = SummaryWriter() # Pre-train generator using only MSE loss if check_point == -1: optimizerG = optim.Adam(netG.parameters()) #schedulerG = MultiStepLR(optimizerG, milestones=[20], gamma=0.1) for epoch in range(1, n_epoch_pretrain + 1): #schedulerG.step() train_bar = tqdm(train_loader) netG.train() cache = {'g_loss': 0} for lowres, real_img_hr in train_bar: if torch.cuda.is_available(): real_img_hr = real_img_hr.cuda() if torch.cuda.is_available(): lowres = lowres.cuda() fake_img_hr = netG(lowres) # Train G netG.zero_grad() image_loss = mse(fake_img_hr, real_img_hr) cache['g_loss'] += image_loss image_loss.backward() optimizerG.step() # Print information by tqdm train_bar.set_description(desc='[%d/%d] Loss_G: %.4f' % (epoch, n_epoch_pretrain, image_loss)) # Save model parameters #if torch.cuda.is_available(): # torch.save(netG.state_dict(), 'cp/netG_epoch_pre_gpu.pth') #else: # torch.save(netG.state_dict(), 'cp/netG_epoch_pre_cpu.pth') optimizerG = optim.Adam(netG.parameters()) optimizerD = optim.Adam(netD.parameters()) if check_point != -1: if torch.cuda.is_available(): netG.load_state_dict(torch.load('cp/netG_epoch_' + str(check_point) + '_gpu.pth')) netD.load_state_dict(torch.load('cp/netD_epoch_' + str(check_point) + '_gpu.pth')) optimizerG.load_state_dict(torch.load('cp/optimizerG_epoch_' + str(check_point) + '_gpu.pth')) optimizerD.load_state_dict(torch.load('cp/optimizerD_epoch_' + str(check_point) + '_gpu.pth')) else : netG.load_state_dict(torch.load('cp/netG_epoch_' + str(check_point) + '_cpu.pth')) netD.load_state_dict(torch.load('cp/netD_epoch_' + str(check_point) + '_cpu.pth')) optimizerG.load_state_dict(torch.load('cp/optimizerG_epoch_' + str(check_point) + '_cpu.pth')) optimizerD.load_state_dict(torch.load('cp/optimizerD_epoch_' + str(check_point) + '_cpu.pth')) for epoch in range(1 + max(check_point, 0), n_epoch + 1 + max(check_point, 0)): train_bar = tqdm(train_loader) netG.train() netD.train() cache = {'mse_loss': 0, 'tv_loss': 0, 'adv_loss': 0, 'g_loss': 0, 'd_loss': 0, 'ssim': 0, 'psnr': 0, 'd_top_grad' : 0, 'd_bot_grad' : 0, 'g_top_grad' : 0, 'g_bot_grad' : 0} for lowres, real_img_hr in train_bar: #print ('lr size : ' + str(data.size())) #print ('hr size : ' + str(target.size())) if torch.cuda.is_available(): real_img_hr = real_img_hr.cuda() lowres = lowres.cuda() # Train D #if not check_grads(netD, 'D'): # return netD.zero_grad() logits_real = netD(real_img_hr) logits_fake = netD(netG(lowres).detach()) # Lable smoothing real = torch.tensor(torch.rand(logits_real.size())*0.25 + 0.85) fake = torch.tensor(torch.rand(logits_fake.size())*0.15) # Lable flipping prob = (torch.rand(logits_real.size()) < 0.05) #print ('logits real size : ' + str(logits_real.size())) #print ('logits fake size : ' + str(logits_fake.size())) if torch.cuda.is_available(): real = real.cuda() fake = fake.cuda() prob = prob.cuda() real_clone = real.clone() real[prob] = fake[prob] fake[prob] = real_clone[prob] d_loss = bce(logits_real, real) + bce(logits_fake, fake) cache['d_loss'] += d_loss.item() d_loss.backward() optimizerD.step() dtg, dbg = get_grads_D(netD) cache['d_top_grad'] += dtg cache['d_bot_grad'] += dbg # Train G #if not check_grads(netG, 'G'): # return netG.zero_grad() fake_img_hr = netG(lowres) image_loss = mse(fake_img_hr, real_img_hr) logits_fake_new = netD(fake_img_hr) adversarial_loss = bce(logits_fake_new, torch.ones_like(logits_fake_new)) #tv_loss = tv(fake_img_hr) g_loss = image_loss + 1e-2*adversarial_loss cache['mse_loss'] += image_loss.item() #cache['tv_loss'] += tv_loss.item() cache['adv_loss'] += adversarial_loss.item() cache['g_loss'] += g_loss.item() g_loss.backward() optimizerG.step() gtg, gbg = get_grads_G(netG) cache['g_top_grad'] += gtg cache['g_bot_grad'] += gbg # Print information by tqdm train_bar.set_description(desc='[%d/%d] D grads:(%f, %f) G grads:(%f, %f) Loss_D: %.4f Loss_G: %.4f = %.4f + %.4f' % (epoch, n_epoch, dtg, dbg, gtg, gbg, d_loss, g_loss, image_loss, adversarial_loss)) if use_tensorboard: writer.add_scalar('d_loss', cache['d_loss']/len(train_loader), epoch) writer.add_scalar('mse_loss', cache['mse_loss']/len(train_loader), epoch) #writer.add_scalar('tv_loss', cache['tv_loss']/len(train_loader), epoch) writer.add_scalar('adv_loss', cache['adv_loss']/len(train_loader), epoch) writer.add_scalar('g_loss', cache['g_loss']/len(train_loader), epoch) writer.add_scalar('D top layer gradient', cache['d_top_grad']/len(train_loader), epoch) writer.add_scalar('D bot layer gradient', cache['d_bot_grad']/len(train_loader), epoch) writer.add_scalar('G top layer gradient', cache['g_top_grad']/len(train_loader), epoch) writer.add_scalar('G bot layer gradient', cache['g_bot_grad']/len(train_loader), epoch) # Save model parameters if torch.cuda.is_available(): torch.save(netG.state_dict(), 'cp/netG_epoch_%d_gpu.pth' % (epoch)) if epoch%5 == 0: torch.save(netD.state_dict(), 'cp/netD_epoch_%d_gpu.pth' % (epoch)) torch.save(optimizerG.state_dict(), 'cp/optimizerG_epoch_%d_gpu.pth' % (epoch)) torch.save(optimizerD.state_dict(), 'cp/optimizerD_epoch_%d_gpu.pth' % (epoch)) else: torch.save(netG.state_dict(), 'cp/netG_epoch_%d_cpu.pth' % (epoch)) if epoch%5 == 0: torch.save(netD.state_dict(), 'cp/netD_epoch_%d_cpu.pth' % (epoch)) torch.save(optimizerG.state_dict(), 'cp/optimizerG_epoch_%d_cpu.pth' % (epoch)) torch.save(optimizerD.state_dict(), 'cp/optimizerD_epoch_%d_cpu.pth' % (epoch)) # Visualize results with torch.no_grad(): netG.eval() out_path = 'vis/' if not os.path.exists(out_path): os.makedirs(out_path) dev_bar = tqdm(dev_loader) valing_results = {'mse': 0, 'ssims': 0, 'psnr': 0, 'ssim': 0, 'batch_sizes': 0} dev_images = [] for val_lr, val_hr_restore, val_hr in dev_bar: batch_size = val_lr.size(0) lr = val_lr hr = val_hr if torch.cuda.is_available(): lr = lr.cuda() hr = hr.cuda() sr = netG(lr) psnr = 10 * log10(1 / ((sr - hr) ** 2).mean().item()) ssim = pytorch_ssim.ssim(sr, hr).item() dev_bar.set_description(desc='[converting LR images to SR images] PSNR: %.4f dB SSIM: %.4f' % (psnr, ssim)) cache['ssim'] += ssim cache['psnr'] += psnr # Avoid out of memory crash on 8G GPU if len(dev_images) < 60 : dev_images.extend([to_image()(val_hr_restore.squeeze(0)), to_image()(hr.data.cpu().squeeze(0)), to_image()(sr.data.cpu().squeeze(0))]) dev_images = torch.stack(dev_images) dev_images = torch.chunk(dev_images, dev_images.size(0) // 3) dev_save_bar = tqdm(dev_images, desc='[saving training results]') index = 1 for image in dev_save_bar: image = utils.make_grid(image, nrow=3, padding=5) utils.save_image(image, out_path + 'epoch_%d_index_%d.png' % (epoch, index), padding=5) index += 1 if use_tensorboard: writer.add_scalar('ssim', cache['ssim']/len(dev_loader), epoch) writer.add_scalar('psnr', cache['psnr']/len(dev_loader), epoch)
def main(): n_epoch_pretrain = 2 use_tensorboard = True parser = argparse.ArgumentParser(description='SRGAN Train') parser.add_argument('--crop_size', default=96, type=int, help='training images crop size') parser.add_argument('--num_epochs', default=500, type=int, help='training epoch') parser.add_argument('--batch_size', default=32, type=int, help='training batch size') parser.add_argument('--train_set', default='data/train', type=str, help='train set path') parser.add_argument('--check_point', type=int, default=-1, help="continue with previous check_point") opt = parser.parse_args() input_size = opt.crop_size n_epoch = opt.num_epochs batch_size = opt.batch_size check_point = opt.check_point check_point_path = 'cp/' if not os.path.exists(check_point_path): os.makedirs(check_point_path) train_set = TrainDataset(opt.train_set, crop_size=input_size, upscale_factor=4) train_loader = DataLoader(dataset=train_set, num_workers=2, batch_size=batch_size, shuffle=True) dev_set = DevDataset('data/dev', upscale_factor=4) dev_loader = DataLoader(dataset=dev_set, num_workers=1, batch_size=1, shuffle=False) mse = nn.MSELoss() if not torch.cuda.is_available(): print ('!!!!!!!!!!!!!!USING CPU!!!!!!!!!!!!!') netG = Generator() print('# generator parameters:', sum(param.numel() for param in netG.parameters())) netD = Discriminator_WGAN() print('# discriminator parameters:', sum(param.numel() for param in netD.parameters())) if torch.cuda.is_available(): netG.cuda() netD.cuda() mse.cuda() if use_tensorboard: configure('log', flush_secs=5) # Pre-train generator using only MSE loss if check_point == -1: optimizerG = optim.Adam(netG.parameters()) for epoch in range(1, n_epoch_pretrain + 1): train_bar = tqdm(train_loader) netG.train() cache = {'g_loss': 0} for lowres, real_img_hr in train_bar: if torch.cuda.is_available(): real_img_hr = real_img_hr.cuda() if torch.cuda.is_available(): lowres = lowres.cuda() fake_img_hr = netG(lowres) # Train G netG.zero_grad() image_loss = mse(fake_img_hr, real_img_hr) cache['g_loss'] += image_loss image_loss.backward() optimizerG.step() # Print information by tqdm train_bar.set_description(desc='[%d/%d] Loss_G: %.4f' % (epoch, n_epoch_pretrain, image_loss)) optimizerG = optim.Adam(netG.parameters(), lr=1e-4) optimizerD = optim.Adam(netD.parameters(), lr=1e-4) if check_point != -1: if torch.cuda.is_available(): netG.load_state_dict(torch.load('cp/netG_epoch_' + str(check_point) + '_gpu.pth')) netD.load_state_dict(torch.load('cp/netD_epoch_' + str(check_point) + '_gpu.pth')) optimizerG.load_state_dict(torch.load('cp/optimizerG_epoch_' + str(check_point) + '_gpu.pth')) optimizerD.load_state_dict(torch.load('cp/optimizerD_epoch_' + str(check_point) + '_gpu.pth')) else : netG.load_state_dict(torch.load('cp/netG_epoch_' + str(check_point) + '_cpu.pth')) netD.load_state_dict(torch.load('cp/netD_epoch_' + str(check_point) + '_cpu.pth')) optimizerG.load_state_dict(torch.load('cp/optimizerG_epoch_' + str(check_point) + '_cpu.pth')) optimizerD.load_state_dict(torch.load('cp/optimizerD_epoch_' + str(check_point) + '_cpu.pth')) for epoch in range(1 + max(check_point, 0), n_epoch + 1 + max(check_point, 0)): train_bar = tqdm(train_loader) netG.train() netD.train() cache = {'mse_loss': 0, 'adv_loss': 0, 'g_loss': 0, 'd_loss': 0, 'ssim': 0, 'psnr': 0, 'd_top_grad' : 0, 'd_bot_grad' : 0, 'g_top_grad' : 0, 'g_bot_grad' : 0} for lowres, real_img_hr in train_bar: #print ('lr size : ' + str(data.size())) #print ('hr size : ' + str(target.size())) if torch.cuda.is_available(): real_img_hr = real_img_hr.cuda() lowres = lowres.cuda() fake_img_hr = netG(lowres) # Train D netD.zero_grad() logits_real = netD(real_img_hr).mean() logits_fake = netD(fake_img_hr).mean() gradient_penalty = compute_gradient_penalty(netD, real_img_hr, fake_img_hr) d_loss = logits_fake - logits_real + 10*gradient_penalty cache['d_loss'] += d_loss.item() d_loss.backward(retain_graph=True) optimizerD.step() dtg, dbg = get_grads_D_WAN(netD) cache['d_top_grad'] += dtg cache['d_bot_grad'] += dbg # Train G netG.zero_grad() image_loss = mse(fake_img_hr, real_img_hr) adversarial_loss = -1*netD(fake_img_hr).mean() g_loss = image_loss + 1e-3*adversarial_loss cache['mse_loss'] += image_loss.item() cache['adv_loss'] += adversarial_loss.item() cache['g_loss'] += g_loss.item() g_loss.backward() optimizerG.step() gtg, gbg = get_grads_G(netG) cache['g_top_grad'] += gtg cache['g_bot_grad'] += gbg # Print information by tqdm train_bar.set_description(desc='[%d/%d] D grads:(%f, %f) G grads:(%f, %f) Loss_D: %.4f Loss_G: %.4f = %.4f + %.4f' % (epoch, n_epoch, dtg, dbg, gtg, gbg, d_loss, g_loss, image_loss, adversarial_loss)) if use_tensorboard: log_value('d_loss', cache['d_loss']/len(train_loader), epoch) log_value('mse_loss', cache['mse_loss']/len(train_loader), epoch) log_value('adv_loss', cache['adv_loss']/len(train_loader), epoch) log_value('g_loss', cache['g_loss']/len(train_loader), epoch) log_value('D top layer gradient', cache['d_top_grad']/len(train_loader), epoch) log_value('D bot layer gradient', cache['d_bot_grad']/len(train_loader), epoch) log_value('G top layer gradient', cache['g_top_grad']/len(train_loader), epoch) log_value('G bot layer gradient', cache['g_bot_grad']/len(train_loader), epoch) # Save model parameters if torch.cuda.is_available(): torch.save(netG.state_dict(), 'cp/netG_epoch_%d_gpu.pth' % (epoch)) if epoch%5 == 0: torch.save(netD.state_dict(), 'cp/netD_epoch_%d_gpu.pth' % (epoch)) torch.save(optimizerG.state_dict(), 'cp/optimizerG_epoch_%d_gpu.pth' % (epoch)) torch.save(optimizerD.state_dict(), 'cp/optimizerD_epoch_%d_gpu.pth' % (epoch)) else: torch.save(netG.state_dict(), 'cp/netG_epoch_%d_cpu.pth' % (epoch)) if epoch%5 == 0: torch.save(netD.state_dict(), 'cp/netD_epoch_%d_cpu.pth' % (epoch)) torch.save(optimizerG.state_dict(), 'cp/optimizerG_epoch_%d_cpu.pth' % (epoch)) torch.save(optimizerD.state_dict(), 'cp/optimizerD_epoch_%d_cpu.pth' % (epoch)) # Visualize results with torch.no_grad(): netG.eval() out_path = 'vis/' if not os.path.exists(out_path): os.makedirs(out_path) dev_bar = tqdm(dev_loader) valing_results = {'mse': 0, 'ssims': 0, 'psnr': 0, 'ssim': 0, 'batch_sizes': 0} dev_images = [] for val_lr, val_hr_restore, val_hr in dev_bar: batch_size = val_lr.size(0) lr = val_lr hr = val_hr if torch.cuda.is_available(): lr = lr.cuda() hr = hr.cuda() sr = netG(lr) psnr = 10 * log10(1 / ((sr - hr) ** 2).mean().item()) ssim = pytorch_ssim.ssim(sr, hr).item() dev_bar.set_description(desc='[converting LR images to SR images] PSNR: %.4f dB SSIM: %.4f' % (psnr, ssim)) cache['ssim'] += ssim cache['psnr'] += psnr # Avoid out of memory crash on 8G GPU if len(dev_images) < 60 : dev_images.extend([to_image()(val_hr_restore.squeeze(0)), to_image()(hr.data.cpu().squeeze(0)), to_image()(sr.data.cpu().squeeze(0))]) dev_images = torch.stack(dev_images) dev_images = torch.chunk(dev_images, dev_images.size(0) // 3) dev_save_bar = tqdm(dev_images, desc='[saving training results]') index = 1 for image in dev_save_bar: image = utils.make_grid(image, nrow=3, padding=5) utils.save_image(image, out_path + 'epoch_%d_index_%d.png' % (epoch, index), padding=5) index += 1 if use_tensorboard: log_value('ssim', cache['ssim']/len(dev_loader), epoch) log_value('psnr', cache['psnr']/len(dev_loader), epoch)
def generate_image(self, name): generated = self.G(tf.random.normal([1, self.noise_size]), training=False) to_image(generated).save(name + '.jpg')
lstRect = list() for resized_image in pyramid(image, scale): # loop over the sliding window for each layer of the pyramid for (x, y, window) in sliding_window(resized_image, stepSize=32, windowSize=(winW, winH)): # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue # THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A # MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE # WINDOW curWindow = (x, y, x + winW, y + winH) subImage = utils.to_image(resized_image).crop(curWindow) normalized_img = pre_processing_data.process_single_file(subImage) lst_img.append(normalized_img) imgDetail = (x, y, level, resized_image) lst_imgDetail.append(imgDetail) # since we do not have a classifier, we'll just draw the window # clone = resized_image.copy() # cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 255, 0), 2) # cv2.imshow("Window", clone) # cv2.waitKey(1) # time.sleep(0.025) level += 1 # Predict all window
def main(): parser = argparse.ArgumentParser(description='Validate SRGAN') parser.add_argument('--val_set', default='data/val', type=str, help='dev set path') parser.add_argument('--start', default=1, type=int, help='model start') parser.add_argument('--end', default=100, type=int, help='model end') parser.add_argument('--interval', default=1, type=int, help='model end') opt = parser.parse_args() val_path = opt.val_set start = opt.start end = opt.end interval = opt.interval val_set = DevDataset(val_path, upscale_factor=4) val_loader = DataLoader(dataset=val_set, num_workers=1, batch_size=1, shuffle=False) now = time.gmtime(time.time()) #configure(str(now.tm_mon) + '-' + str(now.tm_mday) + '-' + str(now.tm_hour) + '-' + str(now.tm_min), flush_secs=5) netG = Generator() if torch.cuda.is_available(): netG.cuda() out_path = 'vis/' if not os.path.exists(out_path): os.makedirs(out_path) for epoch in range(start, end+1): if epoch%interval == 0: with torch.no_grad(): netG.eval() val_bar = tqdm(val_loader) cache = {'ssim': 0, 'psnr': 0} dev_images = [] for val_lr, val_hr_restore, val_hr in val_bar: batch_size = val_lr.size(0) lr = Variable(val_lr) hr = Variable(val_hr) if torch.cuda.is_available(): lr = lr.cuda() hr = hr.cuda() netG.load_state_dict(torch.load('cp/netG_epoch_'+ str(epoch) +'_gpu.pth')) sr = netG(lr) #psnr = 10 * log10(1 / ((sr - hr) ** 2).mean().item()) #ssim = pytorch_ssim.ssim(sr, hr).item() #val_bar.set_description(desc='[converting LR images to SR images] PSNR: %.4f dB SSIM: %.4f' % (psnr, ssim)) #cache['ssim'] += ssim #cache['psnr'] += psnr netG.load_state_dict(torch.load('cp/netG_baseline_gpu.pth')) sr_baseline = netG(lr) # Avoid out of memory crash on 8G GPU if len(dev_images) < 80 : dev_images.extend([to_image()(val_hr_restore.squeeze(0)), to_image()(hr.data.cpu().squeeze(0)), to_image()(sr.data.cpu().squeeze(0)), to_image()(sr_baseline.data.cpu().squeeze(0))]) dev_images = torch.stack(dev_images) dev_images = torch.chunk(dev_images, dev_images.size(0) // 4) dev_save_bar = tqdm(dev_images, desc='[saving training results]') index = 1 for image in dev_save_bar: image = utils.make_grid(image, nrow=4, padding=5) utils.save_image(image, out_path + 'epoch_%d_index_%d.png' % (epoch, index), padding=5) index += 1
def run_single(params): source_path = params['source_path'] target_path = params['target_path'] source_landmarks_path = params['source_landmarks_path'] target_landmarks_path = params['target_landmarks_path'] status = params['status'] y_size = params['y_size'] x_size = params['x_size'] results_path = params['output_path'] current_id = params['id'] if not os.path.isdir(os.path.join(results_path, str(current_id))): os.mkdir(os.path.join(results_path, str(current_id))) source = utils.load_image(source_path) target = utils.load_image(target_path) source_landmarks = utils.load_landmarks(source_landmarks_path) if status == "training": print() print("Training case.") target_landmarks = utils.load_landmarks(target_landmarks_path) else: print() print("Evaluation case.") p_target, p_source, ia_target, ng_target, nr_target, i_u_x, i_u_y, u_x_nr, u_y_nr, warp_resampled_landmarks, warp_original_landmarks, return_dict = am.anhir_method(target, source) transformed_landmarks = warp_original_landmarks(source_landmarks) p_target = utils.normalize(p_target) p_source = utils.normalize(p_source) ia_target = utils.normalize(ia_target) ng_target = utils.normalize(ng_target) nr_target = utils.normalize(nr_target) p_target_i = utils.to_image(p_target) p_source_i = utils.to_image(p_source) ia_target_i = utils.to_image(ia_target) ng_target_i = utils.to_image(ng_target) nr_target_i = utils.to_image(nr_target) json_return_dict = json.dumps(return_dict) with open(os.path.join(results_path, str(current_id), "info.json"), "w") as f: f.write(json_return_dict) if status == "training": try: o_median = np.median(utils.rtre(source_landmarks, target_landmarks, x_size, y_size)) r_median = np.median(utils.rtre(transformed_landmarks, target_landmarks, x_size, y_size)) print("Initial rTRE: ", o_median) print("Resulting rTRE: ", r_median) string_to_save = "Initial TRE: " + str(o_median) + "\n" + "Resulting TRE: " + str(r_median) txt_path = os.path.join(results_path, str(current_id), "tre.txt") with open(txt_path, "w") as file: file.write(string_to_save) except: string_to_save = "Landmarks ERROR" txt_path = os.path.join(results_path, str(current_id), "tre_error.txt") with open(txt_path, "w") as file: file.write(string_to_save) source_save_path = os.path.join(results_path, str(current_id), "source.png") target_save_path = os.path.join(results_path, str(current_id), "target.png") transformed_target_g_save_path = os.path.join(results_path, str(current_id), "target_ng.png") transformed_target_save_path = os.path.join(results_path, str(current_id), "target_nr.png") ia_target_save_path = os.path.join(results_path, str(current_id), "target_ia.png") sitk.WriteImage(p_source_i, source_save_path) sitk.WriteImage(p_target_i, target_save_path) sitk.WriteImage(ng_target_i, transformed_target_g_save_path) sitk.WriteImage(nr_target_i, transformed_target_save_path) sitk.WriteImage(ia_target_i, ia_target_save_path) transformed_source_landmarks_path = os.path.join(results_path, str(current_id), "transformed_source_landmarks.csv") utils.save_landmarks(transformed_source_landmarks_path, transformed_landmarks) return_dict = dict() return_dict['transformed_source_landmarks_path'] = os.path.join(str(current_id), "transformed_source_landmarks.csv") if status == "training": try: return_dict['initial_tre'] = o_median return_dict['resulting_tre'] = r_median except: return_dict['initial_tre'] = 0 return_dict['resulting_tre'] = 0 return return_dict
# image_path = str(sys.argv[1]) # image = Image.open(image_path) img_dir_path = str(sys.argv[1]) stored_path = os.path.join(os.getcwd(), FOLDER_NAME) print("des_path", stored_path) if not os.path.exists(stored_path): os.mkdir(stored_path) for filename in glob.glob(os.path.join(img_dir_path, '*.png')): file_path = os.path.join(os.getcwd(), filename) file_name_without_ext = os.path.splitext(os.path.basename(filename))[0] print(f'Generating hog feature from {file_path}') img_path = os.path.join(os.getcwd(), img_dir_path, file_name_without_ext + '.png') image = Image.open(img_path) fd, hog_image = hog(image, orientations=9, pixels_per_cell=(4, 4), cells_per_block=(2, 2), visualize=True, multichannel=True) # Rescale histogram for better display hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10)) res = utils.to_image(hog_image_rescaled) # res.save(file_name_without_ext + ".png") res.save(os.path.join(stored_path, file_name_without_ext) + '.png', 'png')
for resized_image in pyramid(image, scale): # loop over the sliding window for each layer of the pyramid for (x, y, window) in sliding_window(resized_image, stepSize=32, windowSize=(winW, winH)): num_window += 1 # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue # THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A # MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE # WINDOW curWindow = (x, y, x + winW, y + winH) subImage = utils.to_image(resized_image).crop(curWindow) normalized_img = pre_processing_data.process_single_file(subImage) if predict.detect_single_img(normalized_img) > 0: subImage.save( os.path.join(stored_path, str(count) + '_level' + str(level) + '.png'), 'png') count += 1 if level > 0: cv2.rectangle(resized_image, (x, y), (x + winW, y + winH), (0, 255, 0), 2) cv2.imshow("new win", resized_image) cv2.waitKey(0) print("haha")
def main(): parser = argparse.ArgumentParser(description='Validate SRGAN') parser.add_argument('--val_set', default='data/val', type=str, help='dev set path') parser.add_argument('--m0', default='cp/netG_SRGAN_gpu.pth', type=str, help='model0') parser.add_argument('--m1', default='cp/netG_SRWGANGP_gpu.pth', type=str, help='model1') opt = parser.parse_args() val_path = opt.val_set m0 = opt.m0 m1 = opt.m1 val_set = DevDataset(val_path, upscale_factor=4) val_loader = DataLoader(dataset=val_set, num_workers=1, batch_size=1, shuffle=False) now = time.gmtime(time.time()) #configure(str(now.tm_mon) + '-' + str(now.tm_mday) + '-' + str(now.tm_hour) + '-' + str(now.tm_min), flush_secs=5) netG = Generator() if torch.cuda.is_available(): netG.cuda() out_path = 'vis/' if not os.path.exists(out_path): os.makedirs(out_path) with torch.no_grad(): netG.eval() val_bar = tqdm(val_loader) dev_images = [] for val_lr, val_bic, val_hr in val_bar: batch_size = val_lr.size(0) if torch.cuda.is_available(): lr = val_lr.cuda() hr = val_hr.cuda() netG.load_state_dict(torch.load(m0)) sr0 = netG(lr) netG.load_state_dict(torch.load(m1)) sr1 = netG(lr) netG.load_state_dict(torch.load('cp/netG_baseline_gpu.pth')) sr_baseline = netG(lr) # Avoid out of memory crash on 8G GPU if len(dev_images) < 80: dev_images.extend([ to_image()(val_bic.data.cpu().squeeze(0)), to_image()(sr_baseline.data.cpu().squeeze(0)), to_image()(sr0.data.cpu().squeeze(0)), to_image()(sr1.data.cpu().squeeze(0)), to_image()(hr.data.cpu().squeeze(0)) ]) dev_images = torch.stack(dev_images) dev_images = torch.chunk(dev_images, dev_images.size(0) // 5) dev_save_bar = tqdm(dev_images, desc='[saving images]') index = 1 for image in dev_save_bar: image = utils.make_grid(image, nrow=5, padding=5) utils.save_image(image, out_path + '%d.png' % (index), padding=5) index += 1
for resized_image in pyramid(image, scale): # loop over the sliding window for each layer of the pyramid # for (x, y, window) in sliding_window(resized_image, stepSize=8, windowSize=(winW, winH)): for (x, y, window) in sliding_window(resized_image, stepSize=16, windowSize=(winW, winH)): # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue # THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A # MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE # WINDOW curWindow = (x, y, x + winW, y + winH) subImage = utils.to_image(resized_image).crop(curWindow) normalized_img = pre_processing_data.process_single_file(subImage) lst_img.append(normalized_img) imgDetail = (x, y, level, resized_image) lst_imgDetail.append(imgDetail) # since we do not have a classifier, we'll just draw the window # clone = resized_image.copy() # cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 255, 0), 2) # cv2.imshow("Window", clone) # cv2.waitKey(1) # time.sleep(0.025) level += 1
visualize=False, feature_vector=False, multichannel=None) print(fd.shape) def get_ori_coor(x, y, scaleX, scaleY): return int(x * scaleX), int(y * scaleY) image = cv2.imread('/Users/hungdao/Pictures/rubick.JPG') x = 150 y = 100 win_size = 128 ori_clone = image.copy() resized_img = imutils.resize(image, width=int(image.shape[1] / 1.25)) scaleX = 1 / (resized_img.shape[1] / image.shape[1]) scaleY = 1 / (resized_img.shape[0] / image.shape[0]) ori_x, ori_y = get_ori_coor(x, y, scaleX, scaleY) ori_win_size = int(win_size * 1.25) cv2.rectangle(resized_img, (x, y), (x + win_size, y + win_size), (0, 255, 0), 2) cv2.rectangle(ori_clone, (ori_x, ori_y), (ori_x + ori_win_size, ori_y + ori_win_size), (0, 255, 0), 2) cv2.imshow("new win", resized_img) cv2.imshow("ori win", ori_clone) resized_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB) new_img = utils.to_image(resized_img).save( os.path.join('/Users/hungdao/Pictures/', 'test' + '.jpg'), 'jpeg') cv2.waitKey()