def train_epoch(self): self.model.train() time = timer() for batch_idx, (image, meta, target) in enumerate(self.train_loader): self.optim.zero_grad() self.metrics[...] = 0 input_dict = { "image": recursive_to(image, self.device), "meta": recursive_to(meta, self.device), "target": recursive_to(target, self.device), "mode": "training", } result = self.model(input_dict) loss = self._loss(result) if np.isnan(loss.item()): raise ValueError("loss is nan while training") loss.backward() self.optim.step() if self.avg_metrics is None: self.avg_metrics = self.metrics else: self.avg_metrics = self.avg_metrics * 0.9 + self.metrics * 0.1 self.iteration += 1 self._write_metrics(1, loss.item(), "training", do_print=False) if self.iteration % 4 == 0: tprint( f"{self.epoch:03}/{self.iteration * self.batch_size // 1000:04}k| " + "| ".join(map("{:.5f}".format, self.avg_metrics[0])) + f"| {4 * self.batch_size / (timer() - time):04.1f} " ) time = timer() num_images = self.batch_size * self.iteration if num_images % self.validation_interval == 0 or num_images == 600: self.validate() time = timer()
def train_epoch(self): self.model.train() time = timer() for batch_idx, (image, target, _) in enumerate(self.train_loader): self.optim.zero_grad() self.metrics[...] = 0 # [] input_dict = { "image": recursive_to(image, self.device), "target": recursive_to(target, self.device), "mode": "training", } result = self.model(input_dict) loss = self._loss(result) # calculate total loss of this epoch if np.isnan(loss.item()): raise ValueError("loss is nan while training") loss.backward() self.optim.step() if self.avg_metrics is None: self.avg_metrics = self.metrics else: self.avg_metrics = self.avg_metrics * 0.9 + self.metrics * 0.1 # ??? self.iteration += 1 # count number of iteration self._write_metrics( 1, loss.item(), "training", do_print=False ) # Writes entries directly to event files in the log_dir to be consumed by TensorBoard. if self.iteration % 4 == 0: tprint( f"{self.epoch:03}/{self.iteration * self.batch_size // 1000:04}k| " # epoch/amount of data that has been used + "| ".join(map("{:.5f}".format, self.avg_metrics[0])) + f"| {4 * self.batch_size / (timer() - time):04.1f} ") time = timer() num_images = self.batch_size * self.iteration if num_images % self.validation_interval == 0 or num_images == 600: # validate every 600 image ot ... self.validate() time = timer()
def main(): args = docopt(__doc__) config_file = args["<yaml-config>"] or "config/wireframe.yaml" C.update(C.from_yaml(filename=config_file)) M.update(C.model) pprint.pprint(C, indent=4) random.seed(0) np.random.seed(0) torch.manual_seed(0) device_name = "cpu" os.environ["CUDA_VISIBLE_DEVICES"] = args["--devices"] if torch.cuda.is_available(): device_name = "cuda" torch.backends.cudnn.deterministic = True torch.cuda.manual_seed(0) print("Let's use", torch.cuda.device_count(), "GPU(s)!") else: print("CUDA is not available") device = torch.device(device_name) if M.backbone == "stacked_hourglass": model = lcnn.models.hg( depth=M.depth, head=lambda c_in, c_out: MultitaskHead(c_in, c_out), num_stacks=M.num_stacks, num_blocks=M.num_blocks, num_classes=sum(sum(M.head_size, [])), ) else: raise NotImplementedError checkpoint = torch.load(args["<checkpoint>"]) model = MultitaskLearner(model) model = LineVectorizer(model) model.load_state_dict(checkpoint["model_state_dict"]) model = model.to(device) model.eval() loader = torch.utils.data.DataLoader( WireframeDataset(args["<image-dir>"], split="valid"), shuffle=False, batch_size=M.batch_size, collate_fn=collate, num_workers=C.io.num_workers, pin_memory=True, ) os.makedirs(args["<output-dir>"], exist_ok=True) for batch_idx, (image, meta, target) in enumerate(loader): with torch.no_grad(): input_dict = { "image": recursive_to(image, device), "meta": recursive_to(meta, device), "target": recursive_to(target, device), "do_evaluation": True, } H = model(input_dict)["heatmaps"] for i in range(M.batch_size): index = batch_idx * M.batch_size + i np.savez( osp.join(args["<output-dir>"], f"{index:06}.npz"), **{k: v[i].cpu().numpy() for k, v in H.items()}, ) if not args["--plot"]: continue im = image[i].cpu().numpy().transpose(1, 2, 0) im = im * M.image.stddev + M.image.mean lines = H["lines"][i].cpu().numpy() * 4 scores = H["score"][i].cpu().numpy() if len(lines) > 0 and not (lines[0] == 0).all(): for i, ((a, b), s) in enumerate(zip(lines, scores)): if i > 0 and (lines[i] == lines[0]).all(): break plt.plot([a[1], b[1]], [a[0], b[0]], c=c(s), linewidth=4) plt.show()
def validate(self): tprint("Running validation...", " " * 75) training = self.model.training self.model.eval() viz = osp.join(self.out, "viz", f"{self.iteration * M.batch_size_eval:09d}") npz = osp.join(self.out, "npz", f"{self.iteration * M.batch_size_eval:09d}") osp.exists(viz) or os.makedirs(viz) osp.exists(npz) or os.makedirs(npz) total_loss = 0 self.metrics[...] = 0 with torch.no_grad(): for batch_idx, (image, meta, target) in enumerate(self.val_loader): input_dict = { "image": recursive_to(image, self.device), "meta": recursive_to(meta, self.device), "target": recursive_to(target, self.device), "mode": "validation", } result = self.model(input_dict) total_loss += self._loss(result) H = result["preds"] for i in range(H["jmap"].shape[0]): index = batch_idx * M.batch_size_eval + i np.savez( f"{npz}/{index:06}.npz", **{k: v[i].cpu().numpy() for k, v in H.items()}, ) if index >= 20: continue self._plot_samples(i, index, H, meta, target, f"{viz}/{index:06}") self._write_metrics(len(self.val_loader), total_loss, "validation", True) self.mean_loss = total_loss / len(self.val_loader) torch.save( { "iteration": self.iteration, "arch": self.model.__class__.__name__, "optim_state_dict": self.optim.state_dict(), "model_state_dict": self.model.state_dict(), "best_mean_loss": self.best_mean_loss, }, osp.join(self.out, "checkpoint_latest.pth"), ) shutil.copy( osp.join(self.out, "checkpoint_latest.pth"), osp.join(npz, "checkpoint.pth"), ) if self.mean_loss < self.best_mean_loss: self.best_mean_loss = self.mean_loss shutil.copy( osp.join(self.out, "checkpoint_latest.pth"), osp.join(self.out, "checkpoint_best.pth"), ) if training: self.model.train()
def main(): args = docopt(__doc__) config_file = args["<yaml-config>"] or "config/wireframe.yaml" C.update(C.from_yaml(filename=config_file)) M.update(C.model) pprint.pprint(C, indent=4) random.seed(0) np.random.seed(0) torch.manual_seed(0) device_name = "cpu" os.environ["CUDA_VISIBLE_DEVICES"] = args["--devices"] if torch.cuda.is_available(): device_name = "cuda" torch.backends.cudnn.deterministic = True torch.cuda.manual_seed(0) print("Let's use", torch.cuda.device_count(), "GPU(s)!") else: print("CUDA is not available") device = torch.device(device_name) ### load vote_index matrix for Hough transform ### defualt settings: (128, 128, 3, 1) if os.path.isfile(C.io.vote_index): vote_index = sio.loadmat(C.io.vote_index)['vote_index'] else: vote_index = hough_transform(rows=128, cols=128, theta_res=3, rho_res=1) sio.savemat(C.io.vote_index, {'vote_index': vote_index}) vote_index = torch.from_numpy(vote_index).float().contiguous().to(device) print('load vote_index', vote_index.shape) if M.backbone == "stacked_hourglass": model = lcnn.models.hg( depth=M.depth, head=MultitaskHead, num_stacks=M.num_stacks, num_blocks=M.num_blocks, num_classes=sum(sum(M.head_size, [])), vote_index=vote_index, ) else: raise NotImplementedError checkpoint = torch.load(args["<checkpoint>"]) model = MultitaskLearner(model) model = LineVectorizer(model) model.load_state_dict(checkpoint["model_state_dict"]) model = model.to(device) model.eval() loader = torch.utils.data.DataLoader( # WireframeDataset(args["<image-dir>"], split="valid"), WireframeDataset(rootdir=C.io.datadir, split="test"), shuffle=False, batch_size=M.batch_size, collate_fn=collate, num_workers=C.io.num_workers if os.name != "nt" else 0, pin_memory=True, ) output_dir = C.io.outdir os.makedirs(output_dir, exist_ok=True) for batch_idx, (image, meta, target) in enumerate(loader): with torch.no_grad(): input_dict = { "image": recursive_to(image, device), "meta": recursive_to(meta, device), "target": recursive_to(target, device), "mode": "validation", } H = model(input_dict)["preds"] for i in range(len(image)): index = batch_idx * M.batch_size + i print('index', index) np.savez( osp.join(output_dir, f"{index:06}.npz"), **{k: v[i].cpu().numpy() for k, v in H.items()}, ) if not args["--plot"]: continue im = image[i].cpu().numpy().transpose(1, 2, 0) im = im * M.image.stddev + M.image.mean lines = H["lines"][i].cpu().numpy() * 4 scores = H["score"][i].cpu().numpy() if len(lines) > 0 and not (lines[0] == 0).all(): for i, ((a, b), s) in enumerate(zip(lines, scores)): if i > 0 and (lines[i] == lines[0]).all(): break plt.plot([a[1], b[1]], [a[0], b[0]], c=c(s), linewidth=4) plt.show()
def main(): args = docopt(__doc__) config_file = args["<yaml-config>"] or "config/wireframe.yaml" C.update(C.from_yaml(filename=config_file)) M.update(C.model) pprint.pprint(C, indent=4) random.seed(0) np.random.seed(0) torch.manual_seed(0) if M.backbone == "stacked_hourglass": model = lcnn.models.hg( depth=M.depth, head=lambda c_in, c_out: MultitaskHead(c_in, c_out), num_stacks=M.num_stacks, num_blocks=M.num_blocks, num_classes=sum(sum(M.head_size, [])), ) else: raise NotImplementedError model = MultitaskLearner(model) device_name = "cpu" os.environ["CUDA_VISIBLE_DEVICES"] = args["--devices"] if torch.cuda.is_available(): device_name = "cuda" torch.backends.cudnn.deterministic = True torch.cuda.manual_seed(0) checkpoint = torch.load(args["<checkpoint>"]) print("Let's use", torch.cuda.device_count(), "GPU(s)!") else: checkpoint = torch.load(args["<checkpoint>"], map_location=torch.device('cpu')) print("CUDA is not available") device = torch.device(device_name) model.load_state_dict(checkpoint["model_state_dict"]) model = model.to(device) model.eval() print(f'evaluation batch size {M.batch_size_eval}') loader = torch.utils.data.DataLoader( WireframeDataset(args["<image-dir>"], split="valid"), shuffle=False, batch_size=M.batch_size_eval, collate_fn=collate, num_workers=C.io.num_workers if os.name != "nt" else 0, pin_memory=True, ) if os.path.exists(args["<output-dir>"]): shutil.rmtree(args["<output-dir>"]) os.makedirs(args["<output-dir>"], exist_ok=False) outdir = os.path.join(args["<output-dir>"], 'test_result') os.mkdir(outdir) for batch_idx, (image, target, iname) in enumerate(loader): with torch.no_grad(): # predict given image input_target = { "center": torch.zeros_like(target['center']), "corner": torch.zeros_like(target['corner']), "corner_offset": torch.zeros_like(target['corner_offset']), "corner_bin_offset": torch.zeros_like(target['corner_bin_offset']) } input_dict = { "image": recursive_to(image, device), "target": recursive_to(input_target, device), "mode": "validation", } network_start_time = time() H = model(input_dict)["preds"] network_end_time = time() # plot gt & prediction for i in range(len(iname)): #M.batch_size if not args["--plot"]: continue im = image[i].cpu().numpy().transpose(1, 2, 0) # [512,512,3] # im = im * M.image.stddev + M.image.mean # plot&process gt gt_im_info = [ im, iname[i].split('.')[0] + '_gt.' + iname[i].split('.')[1] ] gt_center = target["center"][i].cpu().numpy() gt_corner = target["corner"][i].cpu().numpy() gt_corner_offset = target["corner_offset"][i].cpu().numpy() gt_corner_bin_offset = target["corner_bin_offset"][i].cpu( ).numpy() feature_maps = [ gt_center, gt_corner, gt_corner_offset, gt_corner_bin_offset ] postprocess(gt_im_info, feature_maps, outdir, NMS=False, plot=True) # plot&process pd pd_im_info = [ im, iname[i].split('.')[0] + '_pd.' + iname[i].split('.')[1] ] pd_center = H["center"][i].cpu().numpy() pd_corner = H["corner"][i].cpu().numpy() pd_corner_offset = H["corner_offset"][i].cpu().numpy() pd_corner_bin_offset = H["corner_bin_offset"][i].cpu().numpy() feature_maps = [ pd_center, pd_corner, pd_corner_offset, pd_corner_bin_offset ] postprocess_start_time = time() grouped_corners = postprocess(pd_im_info, feature_maps, outdir, NMS=True, plot=True) postprocess_end_time = time() print( f'inference time is {postprocess_end_time-postprocess_start_time+network_end_time-network_start_time}, network cost:{network_end_time-network_start_time}, postprocessing cost:{postprocess_end_time-postprocess_start_time}' ) # Evaluation: # eval() # TBD print('-----finished-----') return
def main(): args = docopt(__doc__) config_file = args["<yaml-config>"] or "config/wireframe.yaml" C.update(C.from_yaml(filename=config_file)) M.update(C.model) pprint.pprint(C, indent=4) random.seed(0) np.random.seed(0) torch.manual_seed(0) if M.backbone == "stacked_hourglass": model = lcnn.models.hg( depth=M.depth, head=lambda c_in, c_out: MultitaskHead(c_in, c_out), num_stacks=M.num_stacks, num_blocks=M.num_blocks, num_classes=sum(sum(M.head_size, [])), ) else: raise NotImplementedError model = MultitaskLearner(model) device_name = "cpu" os.environ["CUDA_VISIBLE_DEVICES"] = args["--devices"] if torch.cuda.is_available(): device_name = "cuda" torch.backends.cudnn.deterministic = True torch.cuda.manual_seed(0) checkpoint = torch.load(args["<checkpoint>"]) print("Let's use", torch.cuda.device_count(), "GPU(s)!") else: checkpoint = torch.load(args["<checkpoint>"], map_location=torch.device('cpu')) print("CUDA is not available") device = torch.device(device_name) model.load_state_dict(checkpoint["model_state_dict"]) model = model.to(device) model.eval() loader = torch.utils.data.DataLoader( WireframeDataset(args["<image-dir>"], split="valid"), shuffle=False, batch_size=M.batch_size_eval, collate_fn=collate, num_workers=C.io.num_workers if os.name != "nt" else 0, pin_memory=True, ) os.path.join(args["<output-dir>"], 'test_result') if os.path.exists(args["<output-dir>"]): shutil.rmtree(args["<output-dir>"]) os.makedirs(args["<output-dir>"], exist_ok=False) outdir = os.path.join(args["<output-dir>"], 'test_result') os.mkdir(outdir) # clean previous files in mAP folders for mAP_folder in [ os.path.join(C.io.mAP, 'detection-results'), os.path.join(C.io.mAP, 'ground-truth') ]: if os.path.exists(mAP_folder): shutil.rmtree(mAP_folder) os.makedirs(mAP_folder, exist_ok=False) total_inference_time = 0 time_cost_by_network = 0 time_cost_by_post = 0 for batch_idx, (image, target, iname) in enumerate(loader): with torch.no_grad(): # predict given image input_target = { "center": torch.zeros_like(target['center']), "corner": torch.zeros_like(target['corner']), "corner_offset": torch.zeros_like(target['corner_offset']), "corner_bin_offset": torch.zeros_like(target['corner_bin_offset']) } input_dict = { "image": recursive_to(image, device), "target": recursive_to(input_target, device), "mode": "validation", } # time cost by network timer_begin = time.time() H = model(input_dict)["preds"] timer_end = time.time() time_cost_by_network += timer_end - timer_begin total_inference_time += timer_end - timer_begin # plot prediction for i in range(len(iname)): #M.batch_size im = image[i].cpu().numpy().transpose(1, 2, 0) # [512,512,3] # move gt files to mAP folder for evaluation move_to_mAP(os.path.join(args["<image-dir>"], 'valid'), iname[i], os.path.join(C.io.mAP, 'ground-truth')) # plot&process pd pd_im_info = [ im, iname[i].split('.')[0] + '_pd.' + iname[i].split('.')[1] ] pd_center = H["center"][i].cpu().numpy() pd_corner = H["corner"][i].cpu().numpy() pd_corner_offset = H["corner_offset"][i].cpu().numpy() pd_corner_bin_offset = H["corner_bin_offset"][i].cpu().numpy() feature_maps = [ pd_center, pd_corner, pd_corner_offset, pd_corner_bin_offset ] ## post processing with center prediction # grouped_corners=postprocess(pd_im_info, feature_maps, outdir, NMS=True,plot=args['--plot']) ## post processing without center prediction timer_begin = time.time() grouped_corners = postprocess(pd_im_info, feature_maps, outdir, maxDet=10, NMS=True, plot=args['--plot']) timer_end = time.time() time_cost_by_post += timer_end - timer_begin total_inference_time += timer_end - timer_begin write_pd_to_mAP(grouped_corners, iname[i], os.path.join(C.io.mAP, 'detection-results')) # print(f'prediction of {iname[i]} finished') # Evaluation: evalCOCO() # TBD print("inference time is", total_inference_time / len(loader.dataset), "s / img") print( f"time cost by network is {time_cost_by_network/len(loader.dataset)}, time cost by post-processing is {time_cost_by_post/len(loader.dataset)}" ) print('-----finished-----') return