def main(opt): print('Creating model...') # opt.input_res = 256 render = PtRender(opt).cuda().eval() opt.heads = {'hm': 1, 'params': 257} model = create_model(opt.arch, opt.heads) if opt.load_model != '': model = load_model(model, opt.load_model) model.cuda().eval() if not os.path.exists(opt.output): os.makedirs(opt.output) with torch.no_grad(): for i, image in enumerate(get_frames(opt.video_path)): h, w, _ = image.shape plt.imshow(image[..., ::-1]) plt.show() outfile = os.path.join(opt.output, '{}.jpg'.format(str(i).zfill(8))) pre_img, meta = preprocess(image.copy(), opt.input_res) output, topk_scores, topk_inds, topk_ys, topk_xs = decode( pre_img, model) params = _tranpose_and_gather_feat(output['params'], topk_inds) B, C, _ = params.size() if C == 0: print('no face!') cv2.imwrite(outfile, image) continue # 3DMM formation # split coefficients id_coeff, ex_coeff, tex_coeff, coeff = render.Split_coeff( params.view(-1, params.size(2))) render.set_RotTransLight(coeff, topk_inds.view(-1)) # reconstruct shape canoShape_ = render.Shape_formation(id_coeff, ex_coeff) rotShape = render.RotTrans(canoShape_) Albedo = render.Texture_formation(tex_coeff) Texture, lighting = render.Illumination(Albedo, canoShape_) Texture = torch.clamp(Texture, 0, 1) rotShape = rotShape.view(B, C, -1, 3) Texture = Texture.view(B, C, -1, 3) # Pytorch3D render meshes = construct_meshes(rotShape, Texture, render.BFM.tri.view(1, -1)) rendered, gpu_masks, depth = render(meshes) # RGB rendered = rendered.squeeze(0).detach().cpu().numpy() gpu_masks = gpu_masks.squeeze(0).unsqueeze(-1).cpu().numpy() # resize to original image image = image.astype(np.float32) / 255. rendered = cv2.resize(rendered, (max(h, w), max(h, w)))[:h, :w] gpu_masks = cv2.resize(gpu_masks, (max(h, w), max(h, w)), interpolation=cv2.INTER_NEAREST)[:h, :w, np.newaxis] image_fuse = image * (1 - gpu_masks) + (0.9 * rendered[..., ::-1] + 0.1 * image) * gpu_masks # image_fuse = image * (1 - gpu_masks) + rendered[..., ::-1] * gpu_masks cv2.imwrite(outfile, (image_fuse * 255).astype(np.uint8)) plt.imshow(image_fuse[..., ::-1]) plt.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=str, default='0') parser.add_argument('--epoches', type=int, default=100) parser.add_argument('--batch-size', type=int, default=16) parser.add_argument('--freq', type=int, default=50) parser.add_argument('--lr', type=float, default=1e-4) parser.add_argument('--wd', type=float, default=1e-4) parser.add_argument('--optim', type=str, default='adam', choices=['sgd', 'adam']) parser.add_argument('--seed', type=int, default=666) parser.add_argument('--steps', type=str, default='1000') parser.add_argument('--lr-decay', type=float, default=0.1) parser.add_argument('--backbone', type=str, default='resnet50', choices=['resnet50', 'resnet101']) parser.add_argument('--model-path', type=str, default='') parser.add_argument('--prefix', type=str, default='default', help='model description') parser.add_argument('--version', type=int, default=2, choices=[2, 3, 4], help='model version') parser.add_argument('--num-stage', type=int, default=3) parser.add_argument('--num-channel', type=int, default=256) parser.add_argument('--num-context', type=int, default=2) parser.add_argument('--scale', type=int, default=0) parser.add_argument('--ohkm', action='store_true') args = parser.parse_args() # seed mx.random.seed(args.seed) np.random.seed(args.seed) # hyper parameters ctx = [mx.gpu(int(x)) for x in args.gpu.split(',')] num_ctx = len(ctx) data_dir = cfg.DATA_DIR lr = args.lr wd = args.wd optim = args.optim batch_size = args.batch_size epoches = args.epoches freq = args.freq steps = [int(x) for x in args.steps.split(',')] lr_decay = args.lr_decay backbone = args.backbone prefix = args.prefix model_path = None if args.model_path == '' else args.model_path num_stage = args.num_stage num_channel = args.num_channel num_context = args.num_context ohkm = args.ohkm scale = args.scale version = args.version if version == 2: base_name = 'V2.%s-%s-S%d-C%d-C%d-BS%d-%s' % ( prefix, backbone, num_stage, num_channel, num_context, batch_size, optim) elif version == 3: base_name = 'V3.%s-%s-C%d-BS%d-%s' % (prefix, backbone, num_channel, batch_size, optim) elif version == 4: base_name = 'V4.%s-%s-C%d-BS%d-%s' % (prefix, backbone, num_channel, batch_size, optim) else: raise RuntimeError('no such version %d' % version) filename = './tmp/%s.log' % base_name logger = get_logger(fn=filename) logger.info(args) # data df_train = pd.read_csv(os.path.join(data_dir, 'train.csv')) df_test = pd.read_csv(os.path.join(data_dir, 'val.csv')) traindata = FashionAIKPSDataSet(df_train, version=version, is_train=True) testdata = FashionAIKPSDataSet(df_test, version=version, is_train=False) trainloader = gl.data.DataLoader(traindata, batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_ctx) testloader = gl.data.DataLoader(testdata, batch_size=batch_size, shuffle=False, last_batch='discard', num_workers=num_ctx) epoch_size = len(trainloader) # model if not model_path: num_kps = cfg.NUM_LANDMARK num_limb = len(cfg.PAF_LANDMARK_PAIR) if version == 2: net = PoseNet(num_kps=num_kps, num_limb=num_limb, num_stage=num_stage, num_channel=num_channel, num_context=num_context) creator, featnames, fixed = cfg.BACKBONE_v2[backbone] elif version == 3: net = CascadePoseNet(num_kps=num_kps, num_limb=num_limb, num_channel=num_channel, scale=scale) creator, featnames, fixed = cfg.BACKBONE_v3[backbone] elif version == 4: net = CascadeCPMNet(num_kps=num_kps, num_limb=num_limb, num_channel=num_channel, scale=scale) creator, featnames, fixed = cfg.BACKBONE_v3[backbone] else: raise RuntimeError('no such version %d' % version) net.initialize(mx.init.Normal(), ctx=ctx) net.init_backbone(creator, featnames, fixed, pretrained=True) else: logger.info('Load net from %s', model_path) net = load_model(model_path, version=version, scale=scale) net.collect_params().reset_ctx(ctx) net.hybridize() criterion = SumL2Loss() criterion_ohkm = SumL2Loss(ohkm=True) criterion.hybridize() criterion_ohkm.hybridize() if ohkm: criterions = (criterion, criterion_ohkm) else: criterions = (criterion, criterion) # trainer steps = [epoch_size * x for x in steps] lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(step=steps, factor=lr_decay) if optim == 'sgd': trainer = gl.trainer.Trainer( net.collect_params(), 'sgd', { 'learning_rate': lr, 'wd': wd, 'momentum': 0.9, 'lr_scheduler': lr_scheduler }) else: trainer = gl.trainer.Trainer(net.collect_params(), 'adam', { 'learning_rate': lr, 'wd': wd, 'lr_scheduler': lr_scheduler }) # logger log_dir = './log-v%d/%s' % (version, base_name) if os.path.exists(log_dir): shutil.rmtree(log_dir) sw = SummaryWriter(log_dir) if version == 2: rds = [] for i in range(num_stage): rd1 = Recorder('h-%d' % i, freq) rd2 = Recorder('p-%d' % i, freq) rds.append(rd1) rds.append(rd2) elif version == 3 or version == 4: rds = [Recorder('G-h-04', freq), Recorder('R-h-04', freq), \ Recorder('G-h-08', freq), Recorder('R-h-08', freq), \ Recorder('G-h-16', freq), Recorder('R-h-16', freq), \ Recorder('G-p-04', freq), Recorder('R-p-04', freq), \ Recorder('G-p-08', freq), Recorder('R-p-08', freq), \ Recorder('G-p-16', freq), Recorder('R-p-16', freq)] else: raise RuntimeError('no such version %d' % version) # meta info global_step = 0 # forward and backward if version == 2: forward_backward = forward_backward_v2 elif version == 3 or version == 4: forward_backward = forward_backward_v3 else: raise RuntimeError('no such version %d' % version) for epoch_idx in range(epoches): # train part tic = time.time() for rd in rds: rd.reset() sw.add_scalar('lr', trainer.learning_rate, global_step) for batch_idx, packet in enumerate(trainloader): # [(l1, l2, ...), (l1, l2, ...)] losses = forward_backward(net, criterions, ctx, packet, is_train=True) trainer.step(batch_size) # reduce to [l1, l2, ...] ret = reduce_losses(losses) for rd, loss in zip(rds, ret): rd.update(loss) if batch_idx % freq == freq - 1: for rd in rds: name, value = rd.get() sw.add_scalar('train/' + name, value, global_step) logger.info('[Epoch %d][Batch %d] %s = %f', epoch_idx + 1, batch_idx + 1, name, value) global_step += 1 toc = time.time() speed = (batch_idx + 1) * batch_size / (toc - tic) logger.info('[Epoch %d][Batch %d] Speed = %.2f sample/sec', epoch_idx + 1, batch_idx + 1, speed) toc = time.time() logger.info('[Epoch %d] Global step %d', epoch_idx + 1, global_step - 1) logger.info('[Epoch %d] Train Cost %.0f sec', epoch_idx + 1, toc - tic) # test part tic = time.time() for rd in rds: rd.reset() for batch_idx, packet in enumerate(testloader): losses = forward_backward(net, criterions, ctx, packet, is_train=False) ret = reduce_losses(losses) for rd, loss in zip(rds, ret): rd.update(loss) for rd in rds: name, value = rd.get() sw.add_scalar('test/' + name, value, global_step) logger.info('[Epoch %d][Test] %s = %f', epoch_idx + 1, name, value) toc = time.time() logger.info('[Epoch %d] Test Cost %.0f sec', epoch_idx + 1, toc - tic) # save part save_path = './output/%s-%04d.params' % (base_name, epoch_idx + 1) net.save_params(save_path) logger.info('[Epoch %d] Saved to %s', epoch_idx + 1, save_path)
def work_func(df, idx, args): # hyper parameters ctx = mx.cpu(0) if args.gpu == -1 else mx.gpu(args.gpu) data_dir = args.data_dir version = args.version show = args.show multi_scale = args.multi_scale logger = get_logger() # model feat_stride = cfg.FEAT_STRIDE scales = cfg.DET_SCALES ratios = cfg.DET_RATIOS anchor_proposals = [ AnchorProposal(scales[i], ratios, feat_stride[i]) for i in range(2) ] detnet = DetNet(anchor_proposals) creator, featname, fixed = cfg.BACKBONE_Det['resnet50'] detnet.init_backbone(creator, featname, fixed, pretrained=False) detnet.load_params(args.det_model, ctx) detnet.hybridize() kpsnet = load_model(args.kps_model, version=version) kpsnet.collect_params().reset_ctx(ctx) kpsnet.hybridize() # data image_ids = df['image_id'].tolist() image_paths = [os.path.join(data_dir, img_id) for img_id in image_ids] image_categories = df['image_category'].tolist() # run result = [] for i, (path, category) in enumerate(zip(image_paths, image_categories)): img = cv2.imread(path) # detection h, w = img.shape[:2] dets = multi_scale_detection(detnet, ctx, img, category) if len(dets) != 0: bbox = dets[0, :4] score = dets[0, -1] else: bbox = [0, 0, w, h] score = 0 bbox = get_border(bbox, w, h, 0.2) roi = crop_patch(img, bbox) # predict kps heatmap, paf = multi_scale_predict(kpsnet, ctx, roi, multi_scale) kps_pred = detect_kps(roi, heatmap, paf, category) x1, y1 = bbox[:2] kps_pred[:, 0] += x1 kps_pred[:, 1] += y1 result.append(kps_pred) # show if show: landmark_idx = cfg.LANDMARK_IDX[category] heatmap = heatmap[landmark_idx].max(axis=0) cv2.imshow('det', draw_box(img, bbox, '%s_%.2f' % (category, score))) cv2.imshow('heatmap', draw_heatmap(roi, heatmap)) cv2.imshow('kps_pred', draw_kps(img, kps_pred)) cv2.imshow('paf', draw_paf(roi, paf)) key = cv2.waitKey(0) if key == 27: break if i % 100 == 0: logger.info('Worker %d process %d samples', idx, i + 1) # save fn = file_pattern % (args.type, idx) with open(fn, 'w') as fout: header = 'image_id,image_category,neckline_left,neckline_right,center_front,shoulder_left,shoulder_right,armpit_left,armpit_right,waistline_left,waistline_right,cuff_left_in,cuff_left_out,cuff_right_in,cuff_right_out,top_hem_left,top_hem_right,waistband_left,waistband_right,hemline_left,hemline_right,crotch,bottom_left_in,bottom_left_out,bottom_right_in,bottom_right_out\n' fout.write(header) for img_id, category, kps in zip(image_ids, image_categories, result): fout.write(img_id) fout.write(',%s' % category) for p in kps: s = ',%d_%d_%d' % (p[0], p[1], p[2]) fout.write(s) fout.write('\n')
def work_func(df, idx, args): # hyper parameters ctx = mx.cpu(0) if args.gpu == -1 else mx.gpu(args.gpu) data_dir = args.data_dir model_path = args.model version = args.version scale = args.scale show = args.show multi_scale = args.multi_scale logger = get_logger() # model net = load_model(model_path, version=version, scale=scale) net.collect_params().reset_ctx(ctx) net.hybridize() if args.emodel != '': enet = load_model(args.emodel, version=args.eversion, scale=scale) enet.collect_params().reset_ctx(ctx) enet.hybridize() else: enet = None # data image_ids = df['image_id'].tolist() image_paths = [os.path.join(data_dir, img_id) for img_id in image_ids] image_categories = df['image_category'].tolist() # run result = [] for i, (path, category) in enumerate(zip(image_paths, image_categories)): img = cv2.imread(path) # predict heatmap, paf = multi_scale_predict(net, ctx, img, multi_scale) if enet: eheatmap, epaf = multi_scale_predict(enet, ctx, img, multi_scale) heatmap = (heatmap + eheatmap) / 2 paf = (paf + epaf) / 2 kps_pred = detect_kps(img, heatmap, paf, category) result.append(kps_pred) # show if show: landmark_idx = cfg.LANDMARK_IDX[category] ht = cv2.GaussianBlur(heatmap, (7, 7), 0) ht = ht[landmark_idx].max(axis=0) heatmap = heatmap[landmark_idx].max(axis=0) cv2.imshow('heatmap', draw_heatmap(img, heatmap)) cv2.imshow('heatmap_blur', draw_heatmap(img, ht)) cv2.imshow('kps_pred', draw_kps(img, kps_pred)) cv2.imshow('paf', draw_paf(img, paf)) key = cv2.waitKey(0) if key == 27: break if i % 100 == 0: logger.info('Worker %d process %d samples', idx, i + 1) # save fn = file_pattern % (args.prefix, args.type, idx) with open(fn, 'w') as fout: header = 'image_id,image_category,neckline_left,neckline_right,center_front,shoulder_left,shoulder_right,armpit_left,armpit_right,waistline_left,waistline_right,cuff_left_in,cuff_left_out,cuff_right_in,cuff_right_out,top_hem_left,top_hem_right,waistband_left,waistband_right,hemline_left,hemline_right,crotch,bottom_left_in,bottom_left_out,bottom_right_in,bottom_right_out\n' fout.write(header) for img_id, category, kps in zip(image_ids, image_categories, result): fout.write(img_id) fout.write(',%s' % category) for p in kps: s = ',%d_%d_%d' % (p[0], p[1], p[2]) fout.write(s) fout.write('\n')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gt', type=str, default='./data/val.csv') parser.add_argument('--pred', type=str, default='./result/tmp_val_result.csv') parser.add_argument('--th', type=float, default=0.04) parser.add_argument('--model', type=str) parser.add_argument('--version', type=int, default=2) parser.add_argument('--scale', type=int, default=0) args = parser.parse_args() print(args) img_lst, kps_gt, category = read_csv(args.gt) _, kps_pred, _ = read_csv(args.pred) assert len(kps_gt) == len(kps_pred) # model if args.model: ctx = mx.gpu(0) net = load_model(args.model, version=args.version, scale=args.scale) net.collect_params().reset_ctx(ctx) net.hybridize() th = args.th num_category = len(cfg.CATEGORY) num_landmark = cfg.NUM_LANDMARK result = [[] for _ in range(num_category)] kps_result = [[] for _ in range(num_landmark)] for img_id, gt, pred, cate in zip(img_lst, kps_gt, kps_pred, category): cate_idx = cfg.CATEGORY.index(cate) err, idx, state = calc_error(pred, gt, cate) if state: result[cate_idx].append(err) for i, e in zip(idx, err): kps_result[i].append(e) if args.model and err.mean() > th: # ori print('-------------------------') for i, e in zip(idx, err): print(i, e, gt[i, :2], pred[i, :2]) print('mean1', err.mean()) # model img = cv2.imread('./data/' + img_id) heatmap, paf = multi_scale_predict(net, ctx, img, True) pred = detect_kps(img, heatmap, paf, cate) err, idx, state = calc_error(pred, gt, cate) for i, e in zip(idx, err): print(i, e, gt[i, :2], pred[i, :2]) print('mean2', err.mean()) print('-------------------------') # show landmark_idx = cfg.LANDMARK_IDX[cate] heatmap = heatmap[landmark_idx].max(axis=0) cv2.imshow('heatmap', draw_heatmap(img, heatmap)) cv2.imshow('kps_pred', draw_kps(img, pred)) cv2.imshow('kps_gt', draw_kps(img, gt)) cv2.imshow('paf', draw_paf(img, paf)) key = cv2.waitKey(0) if key == 27: break # per landmark for i in range(num_landmark): err = np.array(kps_result[i]).mean() print('Average Error for %d: %f' % (i, err)) # per category result = [np.hstack(_) for _ in result] for i in range(num_category): category = cfg.CATEGORY[i] err = result[i].mean() print('Average Error for %s: %f' % (category, err)) result = np.hstack(result) err = result.mean() print('Total Average Error %f' % err)
shuffle=False, num_workers=1) print("Test set size: {}.".format(len(dataset))) # Load model resnet = torchvision.models.resnet18(pretrained=True) if use_metadata: model_base = torch.nn.Sequential(*list(resnet.children())[:-1]) model = MetadataModel(model_base, base_out_dim=512) else: resnet.fc = torch.nn.Linear(512, 2) model = resnet # model.to(lib.model.device) model = load_model(model, weight_path) results = predict(model, data_loader, use_metadata=use_metadata) results = np.concatenate(results, axis=0) test_csv = pd.read_csv(csv_path) # test_csv = test_csv.loc[int(len(test_csv)*data_sample_size) ,:] submission_csv = pd.DataFrame({ 'image_name': test_csv['image_name'], 'target': results }) print(submission_csv.head()) submission_csv.to_csv(Path(dirs.csv) / "submission.csv", index=False)