def translate_simple(self, content_image, class_code): self.eval() xa = mbcuda(content_image) s_xb_current = mbcuda(class_code) c_xa_current = self.gen_test.enc_content(xa) xt_current = self.gen_test.decode(c_xa_current, s_xb_current) return xt_current
def load_face_detector(self, face_detector_type: str, face_finder_model: str): if face_detector_type == 'auto': if torch.cuda.is_available(): face_detector_type = 'cnn' else: face_detector_type = 'haar' if face_detector_type == 'cnn': print("Loading CNN face detector...") self.face_cnn = True self.face_detect_model = S3fd_Model() self.face_detect_model.load_state_dict( torch.load(face_finder_model)) mbcuda(self.face_detect_model) self.face_detect_model.eval() elif face_detector_type == 'haar': cv2_base_dir = os.path.dirname(os.path.abspath(cv2.__file__)) haar_model = os.path.join( cv2_base_dir, 'data/haarcascade_frontalface_default.xml') print(f"Loading Haar face detector {haar_model}...") self.face_cnn = False self.haar_face_finder = cv2.CascadeClassifier(haar_model) else: raise RuntimeError( f"Unknown face_detector type {face_detector_type}. Must be auto, cnn, or haar." )
def test_ellen_selfie(): model = S3fd_Model() try: state_dict = torch.load("pretrained-models/s3fd_convert.pth") model.load_state_dict(state_dict) except: print("Failed to load pre-trained model for test") raise mbcuda(model) model.eval() with torch.no_grad(): img = cv2.imread('samples/ellen-selfie.jpg') faces = detect_faces(model, img) assert len(faces) == 11
def forward(self, co_data, cl_data, hp, mode): xa = mbcuda(co_data[0]) la = mbcuda(co_data[1]) xb = mbcuda(cl_data[0]) lb = mbcuda(cl_data[1]) if mode == 'gen_update': c_xa = self.gen.enc_content(xa) s_xa = self.gen.enc_class_model(xa) s_xb = self.gen.enc_class_model(xb) xt = self.gen.decode(c_xa, s_xb) # translation xr = self.gen.decode(c_xa, s_xa) # reconstruction l_adv_t, gacc_t, xt_gan_feat = self.dis.calc_gen_loss(xt, lb) l_adv_r, gacc_r, xr_gan_feat = self.dis.calc_gen_loss(xr, la) _, xb_gan_feat = self.dis(xb, lb) _, xa_gan_feat = self.dis(xa, la) l_c_rec = recon_criterion( xr_gan_feat.mean(3).mean(2), xa_gan_feat.mean(3).mean(2)) l_m_rec = recon_criterion( xt_gan_feat.mean(3).mean(2), xb_gan_feat.mean(3).mean(2)) l_x_rec = recon_criterion(xr, xa) l_adv = 0.5 * (l_adv_t + l_adv_r) acc = 0.5 * (gacc_t + gacc_r) l_total = (hp['gan_w'] * l_adv + hp['r_w'] * l_x_rec + hp['fm_w'] * (l_c_rec + l_m_rec)) l_total.backward() return l_total, l_adv, l_x_rec, l_c_rec, l_m_rec, acc elif mode == 'dis_update': xb.requires_grad_() l_real_pre, acc_r, resp_r = self.dis.calc_dis_real_loss(xb, lb) l_real = hp['gan_w'] * l_real_pre l_real.backward(retain_graph=True) l_reg_pre = self.dis.calc_grad2(resp_r, xb) l_reg = 10 * l_reg_pre l_reg.backward() with torch.no_grad(): c_xa = self.gen.enc_content(xa) s_xb = self.gen.enc_class_model(xb) xt = self.gen.decode(c_xa, s_xb) l_fake_p, acc_f, resp_f = self.dis.calc_dis_fake_loss( xt.detach(), lb) l_fake = hp['gan_w'] * l_fake_p l_fake.backward() l_total = l_fake + l_real + l_reg acc = 0.5 * (acc_f + acc_r) return l_total, l_fake_p, l_real_pre, l_reg_pre, acc else: assert 0, 'Not support operation'
def forward(self, x, y): assert (x.size(0) == y.size(0)) feat = self.cnn_f(x) out = self.cnn_c(feat) index = mbcuda(torch.LongTensor(range(out.size(0)))) out = out[index, y, :, :] return out, feat
def __init__( self, config_file: str = 'funit/configs/funit_animals.yaml', face_finder_model: str = 'pretrained-models/s3fd_convert.pth', funit_model: str = 'pretrained-models/animal149_gen.pt', target_image_folder: str = 'target-images/meerkat', grow_facebox: float = 0.2, cycle_delay: float = 5.0, extra_detail: int = 2, min_face_size: int = 20, max_faces: int = 5, color_map: str = '1,1,1', scale_embedding: float = 1.0, max_alpha: float = 0.7, face_detector_type: str = 'auto', ): self.face_transform_cnt = 0 self.grow_facebox = grow_facebox self.extra_detail = extra_detail self.cycle_delay = cycle_delay self.min_face_size = min_face_size self.max_faces = max_faces self.set_color(*[float(n) for n in color_map.split(',')]) self.scale_embedding = scale_embedding self.max_alpha = max_alpha self.load_face_detector(face_detector_type, face_finder_model) print("Loading trainer...") config = get_config(config_file) self.trainer = Trainer(config) mbcuda(self.trainer) self.trainer.load_ckpt(funit_model) self.trainer.eval() print("Loading transfomer...") transform_list = [ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ] transform_list = [torchvision.transforms.Resize( (128, 128))] + transform_list self.transform = torchvision.transforms.Compose(transform_list) self.target_embedding = self.target_embedding_from_images( target_image_folder)
def calc_dis_real_loss(self, input_real, input_label): resp_real, gan_feat = self.forward(input_real, input_label) total_count = mbcuda( torch.tensor(np.prod(resp_real.size()), dtype=torch.float)) real_loss = torch.nn.ReLU()(1.0 - resp_real).mean() correct_count = (resp_real >= 0).sum() real_accuracy = correct_count.type_as(real_loss) / total_count return real_loss, real_accuracy, resp_real
def olist_from_img(net: nn.Module, img: np.ndarray) -> List[torch.Tensor]: img = img - np.array([104, 117, 123]) img = img.transpose(2, 0, 1) img = img.reshape((1, ) + img.shape) img = mbcuda(Variable(torch.from_numpy(img).float())) olist = net(img) return olist
def calc_gen_loss(self, input_fake, input_fake_label): resp_fake, gan_feat = self.forward(input_fake, input_fake_label) total_count = mbcuda( torch.tensor(np.prod(resp_fake.size()), dtype=torch.float)) loss = -resp_fake.mean() correct_count = (resp_fake >= 0).sum() accuracy = correct_count.type_as(loss) / total_count return loss, accuracy, gan_feat
def calc_dis_fake_loss(self, input_fake, input_label): resp_fake, gan_feat = self.forward(input_fake, input_label) total_count = mbcuda( torch.tensor(np.prod(resp_fake.size()), dtype=torch.float)) fake_loss = torch.nn.ReLU()(1.0 + resp_fake).mean() correct_count = (resp_fake < 0).sum() fake_accuracy = correct_count.type_as(fake_loss) / total_count return fake_loss, fake_accuracy, resp_fake
def compute_k_style(self, style_batch, k): self.eval() style_batch = mbcuda(style_batch) s_xb_before = self.gen_test.enc_class_model(style_batch) s_xb_after = s_xb_before.squeeze(-1).permute(1, 2, 0) s_xb_pool = torch.nn.functional.avg_pool1d(s_xb_after, k) s_xb = s_xb_pool.permute(2, 0, 1).unsqueeze(-1) return s_xb
def translate_k_shot(self, co_data, cl_data, k): self.eval() xa = mbcuda(co_data[0]) xb = mbcuda(cl_data[0]) c_xa_current = self.gen_test.enc_content(xa) if k == 1: c_xa_current = self.gen_test.enc_content(xa) s_xb_current = self.gen_test.enc_class_model(xb) xt_current = self.gen_test.decode(c_xa_current, s_xb_current) else: s_xb_current_before = self.gen_test.enc_class_model(xb) s_xb_current_after = s_xb_current_before.squeeze(-1).permute( 1, 2, 0) s_xb_current_pool = torch.nn.functional.avg_pool1d( s_xb_current_after, k) s_xb_current = s_xb_current_pool.permute(2, 0, 1).unsqueeze(-1) xt_current = self.gen_test.decode(c_xa_current, s_xb_current) return xt_current
def test(self, co_data, cl_data): self.eval() self.gen.eval() self.gen_test.eval() xa = mbcuda(co_data[0]) xb = mbcuda(cl_data[0]) c_xa_current = self.gen.enc_content(xa) s_xa_current = self.gen.enc_class_model(xa) s_xb_current = self.gen.enc_class_model(xb) xt_current = self.gen.decode(c_xa_current, s_xb_current) xr_current = self.gen.decode(c_xa_current, s_xa_current) c_xa = self.gen_test.enc_content(xa) s_xa = self.gen_test.enc_class_model(xa) s_xb = self.gen_test.enc_class_model(xb) xt = self.gen_test.decode(c_xa, s_xb) xr = self.gen_test.decode(c_xa, s_xa) self.train() return xa, xr_current, xt_current, xb, xr, xt
def blend_merge(self, base: np.ndarray, face128: torch.Tensor, x: int, y: int, w: int, h: int): """Take the 128x128 transformed image, and resize it and blend it back into the original in place.""" xforms = torchvision.transforms.Compose([ torchvision.transforms.ToPILImage(), torchvision.transforms.Resize((h, w)), torchvision.transforms.ToTensor(), ]) face = xforms(face128.cpu()) face = mbcuda(face) face = face.permute(1, 2, 0) # CHW -> HWC face *= 255 face = face[:, :, [2, 1, 0]] # BGR to RGB face = self.mod_colors(face) alpha = self.prepare_alpha_mask_pt(h) old = mbcuda(torch.Tensor(base[y:y + h, x:x + w])) blended = old * (1 - alpha) + face * alpha base[y:y + h, x:x + w] = blended.cpu().numpy()
def _process_bbox2(stride, anchor, score, loc, hindex, windex, variances): axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride priors = torch.cat( [axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]).unsqueeze(0) if not use_cpu_for_decoding_bbox: priors = mcuda(priors) variances = mbcuda(variances) box = decode(loc, priors, variances) x1, y1, x2, y2 = box[0] * 1.0 return (x1, y1, x2, y2, score)
def target_embedding_from_images(self, target_image_folder: str) -> torch.Tensor: images = os.listdir(target_image_folder) print(f"Found {len(images)} target images in {target_image_folder}") new_class_code = None for i, f in enumerate(images): if f.startswith('.') or f == "LICENSE": continue # .DS_Store or ._whatever fn = os.path.join(target_image_folder, f) img = Image.open(fn).convert('RGB') img_tensor = mbcuda(self.transform(img).unsqueeze(0)) with torch.no_grad(): class_code = self.trainer.model.compute_k_style(img_tensor, 1) if new_class_code is None: new_class_code = class_code else: new_class_code += class_code return new_class_code / len(images)
def prepare_alpha_mask_pt(self, h: int, alpha_clamp: float = 0.5) -> torch.Tensor: """alpha_clamp # Smaller numbers mean harsher boarders, but using more of the generated image """ # Some heuristic math to come up with an alpha mask to apply to the image before pasting it back in line = mbcuda( torch.arange(-1, 1, 2 / h, dtype=torch.float32).unsqueeze(0)) assert len( line.shape ) == 2 # see https://github.com/pytorch/pytorch/issues/28347 line = line[:, 0:h] assert line.shape == (1, h) alpha = line.T + line assert len(alpha.shape) == 2 alpha = torch.abs(alpha) + torch.abs(torch.rot90(alpha)) # Pretty much all of these constants can be tweaked to change how blending looks. alpha = torch.exp(-((alpha / 3)**2) * 5) alpha = (alpha - alpha.min())**0.8 alpha = torch.clamp(alpha, 0, alpha_clamp) / alpha_clamp * self.max_alpha alpha = alpha.unsqueeze(2).repeat(1, 1, 3) return alpha
from nntools.maybe_cuda import mbcuda import net_s3fd from detect_faces import detect_faces parser = argparse.ArgumentParser(description='PyTorch face detect') parser.add_argument('--net', '-n', default='s3fd', type=str) parser.add_argument('--model', required=True, type=str) parser.add_argument('--path', default='CAMERA', type=str) args = parser.parse_args() use_cuda = torch.cuda.is_available() net = getattr(net_s3fd, args.net)() net.load_state_dict(torch.load(args.model)) mbcuda(net) net.eval() if args.path == 'CAMERA': cap = cv2.VideoCapture(0) with torch.no_grad(): while (True): if args.path == 'CAMERA': ret, img = cap.read() else: img = cv2.imread(args.path) imgshow = np.copy(img) start_time = time.time() bboxlist = detect_faces(net, img, 3) print(
def set_color(self, R: float, G: float, B: float): self.colorshift = mbcuda(torch.Tensor([[[R, G, B]]]))
default='images/n02138411') parser.add_argument('--input', type=str, default='images/input_content.jpg') parser.add_argument('--output', type=str, default='images/output.jpg') opts = parser.parse_args() cudnn.benchmark = True opts.vis = True config = get_config(opts.config) config['batch_size'] = 1 config['gpus'] = 1 trainer = Trainer(config) mbcuda(trainer) trainer.load_ckpt(opts.ckpt) trainer.eval() transform_list = [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] transform_list = [transforms.Resize((128, 128))] + transform_list transform = transforms.Compose(transform_list) print('Compute average class codes for images in %s' % opts.class_image_folder) images = os.listdir(opts.class_image_folder) for i, f in enumerate(images): fn = os.path.join(opts.class_image_folder, f) img = Image.open(fn).convert('RGB') img_tensor = mbcuda(transform(img).unsqueeze(0)) with torch.no_grad():