def load_model(model_file): torch.set_default_tensor_type('torch.cuda.FloatTensor') set_cfg('yolact_plus_resnet50_config') net = Yolact() net.load_weights(model_file) net.eval() return net
class YOLACT_MODEL(): def __init__(self, opts): #concat the two files to one file # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'): # script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth" # call(script, shell=True) set_cfg('yolact_resnet50_config') cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = Yolact() self.net.load_weights(opts['checkpoint']) print("done.") self.net.eval() self.net = self.net.cuda() self.net.detect.use_fast_nms = True cfg.mask_proto_debug = False self.color_cache = defaultdict(lambda: {}) self.threshold = opts['threshold'] # Generate an image based on some text. def detect(self, img): numpy_image = np.array(img) print('starting inference...') frame = torch.from_numpy(numpy_image).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = self.net(batch) print("done.") output_image = self.display(preds, frame, None, None, undo_transform=False, score_threshold=self.threshold) return output_image def display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, top_k = 100, score_threshold = 0.3): img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb = False, crop_masks = True, score_threshold = score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:top_k] img_gpu = img_gpu * masks[0] # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() return img_numpy
def __init__( self, weights='./crow_vision_yolact/data/yolact/weights/weights_yolact_kuka_17/crow_base_35_457142.pth', config=None, batchsize=1, top_k=25, score_threshold=0.1, display_text=True, display_bboxes=True, display_masks=True, display_scores=True): self.score_threshold = score_threshold self.top_k = top_k self.batchsize = batchsize # initialize a yolact net for inference ## YOLACT setup # setup config if config is not None: if '.obj' in config: with open(config, 'rb') as f: config = dill.load(f) set_cfg(config) self.class_names_tuple = get_class_names_tuple() parse_args([ '--top_k=' + str(top_k), '--score_threshold=' + str(score_threshold), '--display_text=' + str(display_text), '--display_bboxes=' + str(display_bboxes), '--display_masks=' + str(display_masks), '--display_scores=' + str(display_scores), ]) # CUDA setup for yolact torch.backends.cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') #YOLACT net itself with torch.no_grad(): net = Yolact().cuda(torch.cuda.current_device()) net.load_weights(weights) net.eval() net.detect.use_fast_nms = True net.detect.use_cross_class_nms = False self.net = net print("YOLACT network available as self.net") #for debug,benchmark self.duration = 0.0
def init_model(transform): args = parse_args() if args.config is not None: print(args.config) set_cfg(args.config) cfg.mask_proto_debug = False if args.trained_model == 'interrupt': args.trained_model = SavePath.get_interrupt('weights/') elif args.trained_model == 'latest': args.trained_model = SavePath.get_latest('weights/', cfg.name) if args.config is None: model_path = SavePath.from_str(args.trained_model) # TODO: Bad practice? Probably want to do a name lookup instead. args.config = model_path.model_name + '_config' print('Config not specified. Parsed %s from the file name.\n' % args.config) set_cfg(args.config) if args.detect: cfg.eval_mask_branch = False if args.dataset is not None: set_dataset(args.dataset) with torch.no_grad(): if args.cuda: cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') print('Loading model...', end='') net = Yolact() net.load_weights(args.trained_model) net.eval() print(' Done.') net = net.cuda() net = CustomDataParallel(net).cuda() transform = torch.nn.DataParallel(FastBaseTransform()).cuda() return net, args
def prepare_model(args): yolact_net = Yolact() net = yolact_net net.train() if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: init_path = args.save_folder + cfg.backbone.path print('Initializing weights...', init_path) if os.path.isfile(init_path): yolact_net.init_weights(backbone_path=init_path) else: print("no init weight, use empty") return yolact_net
def load_weights(filename, cuda): """Load YOLACT network weights""" global ynet if filename == '': raise ValueError('Empty filename for network weights') print('#### CUDA ENABLED', cuda) print(f'Loading weights from {filename}') tic = time.perf_counter_ns() with torch.no_grad(): if cuda: cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # torch.set_default_tensor_type('torch.FloatTensor') ynet = Yolact() ynet.load_weights(filename, False) ynet.eval() toc = time.perf_counter_ns() logging.debug(f'Time to load weights: {1e-9 * (toc - tic)}')
def convert_to_onnx_with_hydra(cfg: DictConfig): # create folder for onnx createFolderOnnx(cfg) # set cfg set_cfg(cfg.onnx.yolact_cfg) model = Yolact() model.load_weights(cfg.onnx.model_ckpt_path) model.eval() model = model.cpu() dummy_input = torch.rand( (cfg.onnx.model_batch_size, cfg.onnx.model_channel_input, cfg.onnx.model_height_input, cfg.onnx.model_width_input)) torch.onnx.export(model, dummy_input, cfg.onnx.model_onnx_path, verbose=cfg.onnx.verbose, opset_version=cfg.onnx.opset_version)
def main(args): rospy.init_node('yolact_ros') rospack = rospkg.RosPack() yolact_path = rospack.get_path('yolact_ros') model_path_str = yolact_path + "/scripts/yolact/weights/yolact_base_54_800000.pth" model_path = SavePath.from_str(model_path_str) set_cfg(model_path.model_name + '_config') with torch.no_grad(): results_path_str = yolact_path + "/scripts/yolact/results" if not os.path.exists(results_path_str): os.makedirs(results_path_str) cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') print('Loading model...', end='') net = Yolact() net.load_weights(model_path_str) net.eval() print(' Done.') net = net.cuda() net.detect.use_fast_nms = True cfg.mask_proto_debug = False ic = image_converter(net) try: rospy.spin() except KeyboardInterrupt: print("Shutting down") cv2.destroyAllWindows()
# Editor : VIM # File name : convert_weight.py # Author : YunYang1994 # Created date: 2019-07-27 18:07:20 # Description : # #================================================================ import torch import numpy as np from yolact import Yolact with torch.no_grad(): model = Yolact() model.eval() model.load_weights("./yolact_darknet53_54_800000.pth") modules = model.children() def parse_layer(layer, weights): assert isinstance(layer, torch.nn.Conv2d) or isinstance( layer, torch.nn.BatchNorm2d) print("=> Parsing ", layer) if isinstance(layer, torch.nn.Conv2d): weight, bias = layer.weight.detach().numpy(), layer.bias weight = np.transpose( weight, [2, 3, 1, 0]) # k_h, h_w, in_channels, out_channels if bias is None: weights.append([weight]) else: bias = layer.bias.detach().numpy()
if __name__ == '__main__': # 数据集与标签 valid_dataset = COCODetection(image_path='./data/coco/images/val2017/', info_file='./data/coco/annotations/instances_val2017.json', transform=BaseTransform(), has_gt=True ) prep_coco_cats() # 模型 print('Loading model...', end='') model = Yolact() model.load_weights(args.trained_model) model.eval() model = model.cuda() if args.cuda else model.cpu() print(' Done.') # 核心入口 with torch.no_grad(): if not os.path.exists('results'): os.makedirs('results') if args.cuda: torch.backends.cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor')
def train(rank, args): if args.num_gpus > 1: multi_gpu_rescale(args) if rank == 0: if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) # set up logger setup_logger(output=os.path.join(args.log_folder, cfg.name), distributed_rank=rank) logger = logging.getLogger("yolact.train") w = SummaryHelper(distributed_rank=rank, log_dir=os.path.join(args.log_folder, cfg.name)) w.add_text("argv", " ".join(sys.argv)) logger.info("Args: {}".format(" ".join(sys.argv))) import git with git.Repo(search_parent_directories=True) as repo: w.add_text("git_hash", repo.head.object.hexsha) logger.info("git hash: {}".format(repo.head.object.hexsha)) try: logger.info("Initializing torch.distributed backend...") dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=args.num_gpus, rank=rank) except Exception as e: logger.error("Process group URL: {}".format(args.dist_url)) raise e dist.barrier() if torch.cuda.device_count() > 1: logger.info('Multiple GPUs detected! Turning off JIT.') collate_fn = detection_collate if cfg.dataset.name == 'YouTube VIS': dataset = YoutubeVIS(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, configs=cfg.dataset, transform=SSDAugmentationVideo(MEANS)) if cfg.dataset.joint == 'coco': joint_dataset = COCODetection( image_path=cfg.joint_dataset.train_images, info_file=cfg.joint_dataset.train_info, transform=SSDAugmentation(MEANS)) joint_collate_fn = detection_collate if args.validation_epoch > 0: setup_eval() val_dataset = YoutubeVIS(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, configs=cfg.dataset, transform=BaseTransformVideo(MEANS)) collate_fn = collate_fn_youtube_vis elif cfg.dataset.name == 'FlyingChairs': dataset = FlyingChairs(image_path=cfg.dataset.trainval_images, info_file=cfg.dataset.trainval_info) collate_fn = collate_fn_flying_chairs else: dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Set cuda device early to avoid duplicate model in master GPU if args.cuda: torch.cuda.set_device(rank) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs. # use timer for experiments timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: logger.info('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume, args=args) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: logger.info('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) if cfg.flow.train_flow: criterion = OpticalFlowLoss() else: criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net.cuda(rank) criterion.cuda(rank) net = nn.parallel.DistributedDataParallel(net, device_ids=[rank], output_device=rank, broadcast_buffers=False, find_unused_parameters=True) # net = nn.DataParallel(net).cuda() # criterion = nn.DataParallel(criterion).cuda() optimizer = optim.SGD(filter(lambda x: x.requires_grad, net.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) w.set_step(iteration) last_time = time.time() epoch_size = len(dataset) // args.batch_size // args.num_gpus num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 from data.sampler_utils import InfiniteSampler, build_batch_data_sampler infinite_sampler = InfiniteSampler(dataset, seed=args.random_seed, num_replicas=args.num_gpus, rank=rank, shuffle=True) train_sampler = build_batch_data_sampler(infinite_sampler, images_per_batch=args.batch_size) data_loader = data.DataLoader( dataset, num_workers=args.num_workers, collate_fn=collate_fn, multiprocessing_context="fork" if args.num_workers > 1 else None, batch_sampler=train_sampler) data_loader_iter = iter(data_loader) if cfg.dataset.joint: joint_infinite_sampler = InfiniteSampler(joint_dataset, seed=args.random_seed, num_replicas=args.num_gpus, rank=rank, shuffle=True) joint_train_sampler = build_batch_data_sampler( joint_infinite_sampler, images_per_batch=args.batch_size) joint_data_loader = data.DataLoader( joint_dataset, num_workers=args.num_workers, collate_fn=joint_collate_fn, multiprocessing_context="fork" if args.num_workers > 1 else None, batch_sampler=joint_train_sampler) joint_data_loader_iter = iter(joint_data_loader) dist.barrier() save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() data_time_avg = MovingAverage(10) global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} def backward_and_log(prefix, net_outs, targets, masks, num_crowds, extra_loss=None): optimizer.zero_grad() out = net_outs["pred_outs"] wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = {k: v.mean() for k, v in losses.items()} # Mean here because Dataparallel if extra_loss is not None: assert type(extra_loss) == dict losses.update(extra_loss) loss = sum([losses[k] for k in losses]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('{prefix}/{key}'.format(prefix=prefix, key=k), losses[k].item()) return losses logger.info('Begin training!') # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue while True: data_start_time = time.perf_counter() datum = next(data_loader_iter) dist.barrier() data_end_time = time.perf_counter() data_time = data_end_time - data_start_time if iteration != args.start_iter: data_time_avg.add(data_time) # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until and cfg.lr_warmup_init < args.lr: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) elif cfg.lr_schedule == 'cosine': set_lr( optimizer, args.lr * ((math.cos(math.pi * iteration / cfg.max_iter) + 1.) * .5)) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while cfg.lr_schedule == 'step' and step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) global lr w.add_scalar('meta/lr', lr) if cfg.dataset.name == "FlyingChairs": imgs_1, imgs_2, flows = prepare_flow_data(datum) net_outs = net(None, extras=(imgs_1, imgs_2)) # Compute Loss optimizer.zero_grad() losses = criterion(net_outs, flows) losses = {k: v.mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('loss/%s' % k, losses[k].item()) elif cfg.dataset.joint or not cfg.dataset.is_video: if cfg.dataset.joint: joint_datum = next(joint_data_loader_iter) dist.barrier() # Load training data # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there images, targets, masks, num_crowds = prepare_data( joint_datum) else: images, targets, masks, num_crowds = prepare_data( datum) extras = { "backbone": "full", "interrupt": False, "moving_statistics": { "aligned_feats": [] } } net_outs = net(images, extras=extras) out = net_outs["pred_outs"] # Compute Loss optimizer.zero_grad() wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = {k: v.mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('joint/%s' % k, losses[k].item()) # Forward Pass if cfg.dataset.is_video: # reference frames references = [] moving_statistics = {"aligned_feats": [], "conf_hist": []} for idx, frame in enumerate(datum[:0:-1]): images, annots = frame extras = { "backbone": "full", "interrupt": True, "keep_statistics": True, "moving_statistics": moving_statistics } with torch.no_grad(): net_outs = net(images, extras=extras) moving_statistics["feats"] = net_outs["feats"] moving_statistics["lateral"] = net_outs["lateral"] keys_to_save = ("outs_phase_1", "outs_phase_2") for key in set(net_outs.keys()) - set(keys_to_save): del net_outs[key] references.append(net_outs) # key frame with annotation, but not compute full backbone frame = datum[0] images, annots = frame frame = ( images, annots, ) images, targets, masks, num_crowds = prepare_data(frame) extras = { "backbone": "full", "interrupt": not cfg.flow.base_backward, "moving_statistics": moving_statistics } gt_net_outs = net(images, extras=extras) if cfg.flow.base_backward: losses = backward_and_log("compute", gt_net_outs, targets, masks, num_crowds) keys_to_save = ("outs_phase_1", "outs_phase_2") for key in set(gt_net_outs.keys()) - set(keys_to_save): del gt_net_outs[key] # now do the warp if len(references) > 0: reference_frame = references[0] extras = { "backbone": "partial", "moving_statistics": moving_statistics } net_outs = net(images, extras=extras) extra_loss = yolact_net.extra_loss( net_outs, gt_net_outs) losses = backward_and_log("warp", net_outs, targets, masks, num_crowds, extra_loss=extra_loss) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time w.add_scalar('meta/data_time', data_time) w.add_scalar('meta/iter_time', elapsed) # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] if torch.cuda.is_available(): max_mem_mb = torch.cuda.max_memory_allocated( ) / 1024.0 / 1024.0 # torch.cuda.reset_max_memory_allocated() else: max_mem_mb = None logger.info("""\ eta: {eta} epoch: {epoch} iter: {iter} \ {losses} {loss_total} \ time: {time} data_time: {data_time} lr: {lr} {memory}\ """.format(eta=eta_str, epoch=epoch, iter=iteration, losses=" ".join([ "{}: {:.3f}".format(k, loss_avgs[k].get_avg()) for k in losses ]), loss_total="T: {:.3f}".format( sum([loss_avgs[k].get_avg() for k in losses])), data_time="{:.3f}".format(data_time_avg.get_avg()), time="{:.3f}".format(elapsed), lr="{:.6f}".format(lr), memory="max_mem: {:.0f}M".format(max_mem_mb))) if rank == 0 and iteration % 100 == 0: if cfg.flow.train_flow: import flowiz as fz from layers.warp_utils import deform_op tgt_size = (64, 64) flow_size = flows.size()[2:] vis_data = [] for pred_flow in net_outs: vis_data.append(pred_flow) deform_gt = deform_op(imgs_2, flows) flows_pred = [ F.interpolate(x, size=flow_size, mode='bilinear', align_corners=False) for x in net_outs ] deform_preds = [ deform_op(imgs_2, x) for x in flows_pred ] vis_data.append( F.interpolate(flows, size=tgt_size, mode='area')) vis_data = [ F.interpolate(flow[:1], size=tgt_size) for flow in vis_data ] vis_data = [ fz.convert_from_flow( flow[0].data.cpu().numpy().transpose( 1, 2, 0)).transpose( 2, 0, 1).astype('float32') / 255 for flow in vis_data ] def convert_image(image): image = F.interpolate(image, size=tgt_size, mode='area') image = image[0] image = image.data.cpu().numpy() image = image[::-1] image = image.transpose(1, 2, 0) image = image * np.array(STD) + np.array(MEANS) image = image.transpose(2, 0, 1) image = image / 255 image = np.clip(image, -1, 1) image = image[::-1] return image vis_data.append(convert_image(imgs_1)) vis_data.append(convert_image(imgs_2)) vis_data.append(convert_image(deform_gt)) vis_data.extend( [convert_image(x) for x in deform_preds]) vis_data_stack = np.stack(vis_data, axis=0) w.add_images("preds_flow", vis_data_stack) elif cfg.flow.warp_mode == "flow": import flowiz as fz tgt_size = (64, 64) vis_data = [] for pred_flow, _, _ in net_outs["preds_flow"]: vis_data.append(pred_flow) vis_data = [ F.interpolate(flow[:1], size=tgt_size) for flow in vis_data ] vis_data = [ fz.convert_from_flow( flow[0].data.cpu().numpy().transpose( 1, 2, 0)).transpose( 2, 0, 1).astype('float32') / 255 for flow in vis_data ] input_image = F.interpolate(images, size=tgt_size, mode='area') input_image = input_image[0] input_image = input_image.data.cpu().numpy() input_image = input_image.transpose(1, 2, 0) input_image = input_image * np.array( STD[::-1]) + np.array(MEANS[::-1]) input_image = input_image.transpose(2, 0, 1) input_image = input_image / 255 input_image = np.clip(input_image, -1, 1) vis_data.append(input_image) vis_data_stack = np.stack(vis_data, axis=0) w.add_images("preds_flow", vis_data_stack) iteration += 1 w.set_step(iteration) if rank == 0 and iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) logger.info('Saving state, iter: {}'.format(iteration)) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: logger.info('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: if rank == 0: compute_validation_map(yolact_net, val_dataset) dist.barrier() except KeyboardInterrupt: if args.interrupt_no_save: logger.info('No save on interrupt, just exiting...') elif rank == 0: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) return if rank == 0: yolact_net.save_weights(save_path(epoch, iteration))
class YOLACT_MODEL(): def __init__(self, opts): #concat the two files to one file # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'): # script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth" # call(script, shell=True) set_cfg('yolact_resnet50_config') cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = Yolact() self.net.load_weights(opts['checkpoint']) print("done.") self.net.eval() self.net = self.net.cuda() self.net.detect.use_fast_nms = True cfg.mask_proto_debug = False self.color_cache = defaultdict(lambda: {}) self.threshold = opts['threshold'] self.mode = opts['mode'] # Generate an image based on some text. def detect(self, img): numpy_image = np.array(img) print('starting inference...') frame = torch.from_numpy(numpy_image).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = self.net(batch) print("done.") return self.display(preds, frame, None, None, undo_transform=False, score_threshold=self.threshold) def display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, top_k=100, score_threshold=0.3): img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=False, crop_masks=True, score_threshold=score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:top_k] classes, scores, boxes = [ x[:top_k].detach().cpu().numpy() for x in t[:3] ] num_dets_to_consider = min(top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < 0: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().detach().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in self.color_cache[on_gpu]: return self.color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. self.color_cache[on_gpu][color_idx] = color return color show_mask = True show_box = True if self.mode == "mask_only": show_box = False if self.mode == "box_only": show_mask = False print("mode :", self.mode) print("show_mask :", show_mask) print("show_box :", show_box) # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if show_mask and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if show_box: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if True: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if True: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score) if True else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return (img_numpy, boxes, scores)
def interpret(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net = nn.DataParallel(net).cuda() criterion = nn.DataParallel(criterion).cuda() # net = net.cuda() # criterion = criterion.cuda() # criterion = criterion.cuda() # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size print("Dataset Size:") print(len(dataset)) num_epochs = math.ceil(cfg.max_iter / epoch_size) num_epochs = 1 # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin interpret!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue count = 0 for datum in data_loader: del datum count += 1 if count % 10000 == 0: print(count) continue except KeyboardInterrupt: print('Stopping early. Saving network...') print("Loaded Dataset Numbers") print(count)
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net = nn.DataParallel(net).cuda() criterion = nn.DataParallel(criterion).cuda() # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() loss_types = ['B', 'C', 'M', 'P', 'D', 'E', 'S'] # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # Load training data # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there images, targets, masks, num_crowds = prepare_data(datum) # Forward Pass out = net(images) # Compute Loss optimizer.zero_grad() wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = {k: v.mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(yolact_net, val_dataset) except KeyboardInterrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio) if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print( 'Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = CustomDataParallel(NetLoss(net, criterion)) if args.cuda: net = net.cuda() # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(datum) losses = {k: (v).mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) if args.log: precision = 5 loss_info = { k: round(losses[k].item(), precision) for k in losses } loss_info['T'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0 ) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) # Compute validation mAP after training is finished compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() print('\n--- Generator created! ---') # NOTE # I maunally set the original image size and seg size as 138 # might change in the future, for example 550 if cfg.pred_seg: dis_size = 138 dis_net = Discriminator_Wgan(i_size = dis_size, s_size = dis_size) # Change the initialization inside the dis_net class inside # set the dis net's initial parameter values # dis_net.apply(gan_init) dis_net.train() print('--- Discriminator created! ---\n') if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) # optimizer_gen = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, # weight_decay=args.decay) # if cfg.pred_seg: # optimizer_dis = optim.SGD(dis_net.parameters(), lr=cfg.dis_lr, momentum=args.momentum, # weight_decay=args.decay) # schedule_dis = ReduceLROnPlateau(optimizer_dis, mode = 'min', patience=6, min_lr=1E-6) # NOTE: Using the Ranger Optimizer for the generator optimizer_gen = Ranger(net.parameters(), lr = args.lr, weight_decay=args.decay) # optimizer_gen = optim.RMSprop(net.parameters(), lr = args.lr) # FIXME: Might need to modify the lr in the optimizer carefually # check this # def make_D_optimizer(cfg, model): # params = [] # for key, value in model.named_parameters(): # if not value.requires_grad: # continue # lr = cfg.SOLVER.BASE_LR/5.0 # weight_decay = cfg.SOLVER.WEIGHT_DECAY # if "bias" in key: # lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR/5.0 # weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS # params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] # optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) # return optimizer if cfg.pred_seg: optimizer_dis = optim.SGD(dis_net.parameters(), lr=cfg.dis_lr) # optimizer_dis = optim.RMSprop(dis_net.parameters(), lr = cfg.dis_lr) schedule_dis = ReduceLROnPlateau(optimizer_dis, mode = 'min', patience=6, min_lr=1E-6) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio, pred_seg=cfg.pred_seg) # criterion_dis = nn.BCELoss() # Take the advice from WGAN criterion_dis = DiscriminatorLoss_Maskrcnn() criterion_gen = GeneratorLoss_Maskrcnn() if args.batch_alloc is not None: # e.g. args.batch_alloc: 24,24 args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print('Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = CustomDataParallel(NetLoss(net, criterion, pred_seg=cfg.pred_seg)) if args.cuda: net = net.cuda() # NOTE if cfg.pred_seg: dis_net = nn.DataParallel(dis_net) dis_net = dis_net.cuda() # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # NOTE val_loader = data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers*2, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order # TODO: global command can modify global variable inside of the function. loss_avgs = { k: MovingAverage(100) for k in loss_types } # NOTE # Enable AMP amp_enable = cfg.amp scaler = torch.cuda.amp.GradScaler(enabled=amp_enable) print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch+1)*epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch+1)*epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer_gen, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer_gen, args.lr * (args.gamma ** step_index)) # NOTE if cfg.pred_seg: # ====== GAN Train ====== # train the gen and dis in different iteration # it_alter_period = iteration % (cfg.gen_iter + cfg.dis_iter) # FIXME: # present_time = time.time() for _ in range(cfg.dis_iter): # freeze_pretrain(yolact_net, freeze=False) # freeze_pretrain(net, freeze=False) # freeze_pretrain(dis_net, freeze=False) # if it_alter_period == 0: # print('--- Generator freeze ---') # print('--- Discriminator training ---') if cfg.amp: with torch.cuda.amp.autocast(): # ----- Discriminator part ----- # seg_list is the prediction mask # can be regarded as generated images from YOLACT # pred_list is the prediction label # seg_list dim: list of (138,138,instances) # pred_list dim: list of (instances) losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) # input image size is [b, 3, 550, 550] # downsample to [b, 3, seg_h, seg_w] image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) # Because in the discriminator training, we do not # want the gradient flow back to the generator part # we detach seg_clas (mask_clas come the data, does not have grad) output_pred = dis_net(img = image.detach(), seg = seg_clas.detach()) output_grou = dis_net(img = image.detach(), seg = mask_clas.detach()) # p = elem_mul_p.squeeze().permute(1,2,0).cpu().detach().numpy() # g = elem_mul_g.squeeze().permute(1,2,0).cpu().detach().numpy() # image = image.squeeze().permute(1,2,0).cpu().detach().numpy() # from PIL import Image # seg_PIL = Image.fromarray(p, 'RGB') # mask_PIL = Image.fromarray(g, 'RGB') # seg_PIL.save('mul_seg.png') # mask_PIL.save('mul_mask.png') # raise RuntimeError # from matplotlib import pyplot as plt # fig, (ax1, ax2) = plt.subplots(1,2) # ax1.imshow(mask_show) # ax2.imshow(seg_show) # plt.show(block=False) # plt.pause(2) # plt.close() # if iteration % (cfg.gen_iter + cfg.dis_iter) == 0: # print(f'Probability of fake is fake: {output_pred.mean().item():.2f}') # print(f'Probability of real is real: {output_grou.mean().item():.2f}') # 0 for Fake/Generated # 1 for True/Ground Truth # fake_label = torch.zeros(b) # real_label = torch.ones(b) # Advice of practical implementation # from https://arxiv.org/abs/1611.08408 # loss_pred = -criterion_dis(output_pred,target=real_label) # loss_pred = criterion_dis(output_pred,target=fake_label) # loss_grou = criterion_dis(output_grou,target=real_label) # loss_dis = loss_pred + loss_grou # Wasserstein Distance (Earth-Mover) loss_dis = criterion_dis(input=output_grou,target=output_pred) # Backprop the discriminator # Scales loss. Calls backward() on scaled loss to create scaled gradients. scaler.scale(loss_dis).backward() scaler.step(optimizer_dis) scaler.update() optimizer_dis.zero_grad() # clip the updated parameters _ = [par.data.clamp_(-cfg.clip_value, cfg.clip_value) for par in dis_net.parameters()] # ----- Generator part ----- # freeze_pretrain(yolact_net, freeze=False) # freeze_pretrain(net, freeze=False) # freeze_pretrain(dis_net, freeze=False) # if it_alter_period == (cfg.dis_iter+1): # print('--- Generator training ---') # print('--- Discriminator freeze ---') # FIXME: # print(f'dis time pass: {time.time()-present_time:.2f}') # FIXME: # present_time = time.time() with torch.cuda.amp.autocast(): losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) # Perform forward pass of all-fake batch through D # NOTE this seg_clas CANNOT detach, in order to flow the # gradient back to the generator # output = dis_net(img = image, seg = seg_clas) # Since the log(1-D(G(x))) not provide sufficient gradients # We want log(D(G(x)) instead, this can be achieve by # use the real_label as target. # This step is crucial for the information of discriminator # to go into the generator. # Calculate G's loss based on this output # real_label = torch.ones(b) # loss_gen = criterion_dis(output,target=real_label) # GAN MaskRCNN output_pred = dis_net(img = image, seg = seg_clas) output_grou = dis_net(img = image, seg = mask_clas) # Advice from WGAN # loss_gen = -torch.mean(output) loss_gen = criterion_gen(input=output_grou,target=output_pred) # since the dis is already freeze, the gradients will only # record the YOLACT losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) loss += loss_gen # Generator backprop scaler.scale(loss).backward() scaler.step(optimizer_gen) scaler.update() optimizer_gen.zero_grad() # FIXME: # print(f'gen time pass: {time.time()-present_time:.2f}') # print('GAN part over') else: losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) output_pred = dis_net(img = image.detach(), seg = seg_clas.detach()) output_grou = dis_net(img = image.detach(), seg = mask_clas.detach()) loss_dis = criterion_dis(input=output_grou,target=output_pred) loss_dis.backward() optimizer_dis.step() optimizer_dis.zero_grad() _ = [par.data.clamp_(-cfg.clip_value, cfg.clip_value) for par in dis_net.parameters()] # ----- Generator part ----- # FIXME: # print(f'dis time pass: {time.time()-present_time:.2f}') # FIXME: # present_time = time.time() losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) # GAN MaskRCNN output_pred = dis_net(img = image, seg = seg_clas) output_grou = dis_net(img = image, seg = mask_clas) loss_gen = criterion_gen(input=output_grou,target=output_pred) # since the dis is already freeze, the gradients will only # record the YOLACT losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) loss += loss_gen loss.backward() # Do this to free up vram even if loss is not finite optimizer_gen.zero_grad() if torch.isfinite(loss).item(): # since the optimizer_gen is for YOLACT only # only the gen will be updated optimizer_gen.step() # FIXME: # print(f'gen time pass: {time.time()-present_time:.2f}') # print('GAN part over') else: # ====== Normal YOLACT Train ====== # Zero the grad to get ready to compute gradients optimizer_gen.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(datum) losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer_gen.step() # Add the loss to the moving average for bookkeeping _ = [loss_avgs[k].add(losses[k].item()) for k in losses] # for k in losses: # loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str(datetime.timedelta(seconds=(cfg.max_iter-iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) if cfg.pred_seg: print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) # print(f'Generator loss: {loss_gen:.2f} | Discriminator loss: {loss_dis:.2f}') # Loss Key: # - B: Box Localization Loss # - C: Class Confidence Loss # - M: Mask Loss # - P: Prototype Loss # - D: Coefficient Diversity Loss # - E: Class Existence Loss # - S: Semantic Segmentation Loss # - T: Total loss if args.log: precision = 5 loss_info = {k: round(losses[k].item(), precision) for k in losses} loss_info['T'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: # NOTE: Validation loss # if cfg.pred_seg: # net.eval() # dis_net.eval() # cfg.gan_eval = True # with torch.no_grad(): # for datum in tqdm(val_loader, desc='GAN Validation'): # losses, seg_list, pred_list = net(datum) # losses, seg_list, pred_list = net(datum) # # TODO: warp below as a function # seg_list = [v.permute(2,1,0).contiguous() for v in seg_list] # b = len(seg_list) # batch size # _, seg_h, seg_w = seg_list[0].size() # seg_clas = torch.zeros(b, cfg.num_classes-1, seg_h, seg_w) # mask_clas = torch.zeros(b, cfg.num_classes-1, seg_h, seg_w) # target_list = [target for target in datum[1][0]] # mask_list = [interpolate(mask.unsqueeze(0), size = (seg_h,seg_w),mode='bilinear', \ # align_corners=False).squeeze() for mask in datum[1][1]] # for idx in range(b): # for i, (pred, i_target) in enumerate(zip(pred_list[idx], target_list[idx])): # seg_clas[idx, pred, ...] += seg_list[idx][i,...] # mask_clas[idx, i_target[-1].long(), ...] += mask_list[idx][i,...] # seg_clas = torch.clamp(seg_clas, 0, 1) # image = interpolate(torch.stack(datum[0]), size = (seg_h,seg_w), # mode='bilinear',align_corners=False) # real_label = torch.ones(b) # output_pred = dis_net(img = image, seg = seg_clas) # output_grou = dis_net(img = image, seg = mask_clas) # loss_pred = -criterion_dis(output_pred,target=real_label) # loss_grou = criterion_dis(output_grou,target=real_label) # loss_dis = loss_pred + loss_grou # losses = { k: (v).mean() for k,v in losses.items() } # loss = sum([losses[k] for k in losses]) # val_loss = loss - cfg.lambda_dis*loss_dis # schedule_dis.step(loss_dis) # lr = [group['lr'] for group in optimizer_dis.param_groups] # print(f'Discriminator lr: {lr[0]}') # net.train() if epoch % args.validation_epoch == 0 and epoch > 0: cfg.gan_eval = False dis_net.eval() compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) # Compute validation mAP after training is finished compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
if cv2.waitKey(33) == 27: break cv2.destroyAllWindows() camera.release() return if __name__ == '__main__': rospy.init_node('test') sub_img = Get_image() print('Loading model...', end='') with torch.no_grad(): torch.set_default_tensor_type('torch.cuda.FloatTensor') net = Yolact() net.load_weights( '/home/chien/ros_yolact/src/yolact/src/weights/yolact_base_1333_8000.pth' ) net.eval() net = net.cuda() print(' Done.') while not rospy.is_shutdown(): cv2.imshow("YOLACT1", sub_img.cv_image) image = torch.from_numpy(sub_img.cv_image).cuda().float() batch = FastBaseTransform()(image.unsqueeze(0)) preds = net(batch) img_numpy = prep_display(preds, image, None, None, undo_transform=False) cv2.imshow("YOLACT", img_numpy)
else: torch.set_default_tensor_type('torch.FloatTensor') if args.resume and not args.display: with open(args.ap_data_file, 'rb') as f: ap_data = pickle.load(f) calc_map(ap_data) exit() if args.image is None and args.video is None and args.images is None: dataset = COCODetection(cfg.dataset.valid_images, cfg.dataset.valid_info, transform=BaseTransform(), has_gt=cfg.dataset.has_gt) prep_coco_cats() else: dataset = None print('Loading model...', end='') net = Yolact() map_location = None if args.cuda else 'cpu' net.load_weights(args.trained_model, map_location=map_location) net.eval() print(' Done.') if args.cuda: net = net.cuda() evaluate(net, dataset) print("time_taken", time.time() - s_t)
class YolactInterface(object): def __init__(self, model_pth, output_num=5): self.output_num = output_num with torch.no_grad(): set_cfg("yolact_base_config") torch.cuda.set_device(0) cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = Yolact() self.net.load_weights(model_pth) self.net.eval() self.net = self.net.cuda() print("load model complete") def run_once(self, src): self.net.detect.cross_class_nms = True self.net.detect.use_fast_nms = True cfg.mask_proto_debug = False with torch.no_grad(): frame = torch.Tensor(src).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) time_start = time.clock() preds = self.net(batch) time_elapsed = (time.clock() - time_start) h, w, _ = src.shape t = postprocess( preds, w, h, visualize_lincomb=False, crop_masks=True, score_threshold=0.) # TODO: give a suitable threshold torch.cuda.synchronize() classes, scores, bboxes, masks = [ x[:self.output_num].cpu().numpy() for x in t ] # TODO: Only 5 objects for test print(time_elapsed) instances = self.build_up_result(masks.shape[0], classes, bboxes, masks, scores) return {"instances": instances} def build_up_result(self, num, classes, bboxes, masks, scores): instances = [] for i in range(num): bbox = [ bboxes[i, 0], bboxes[i, 1], bboxes[i, 2] - bboxes[i, 0], bboxes[i, 3] - bboxes[i, 1] ] # Round to the nearest 10th to avoid huge file sizes, as COCO suggests bbox = [round(float(x) * 10) / 10 for x in bbox] # encode segmentation with RLE rle = pycocotools.mask.encode( np.asfortranarray(masks[i, :, :].astype( np.uint8))) # rle binary encoding rle['counts'] = rle['counts'].decode( 'ascii') # json.dump doesn't like bytes strings # create one instance json instances.append({ 'category_id': int(classes[i] ), # TODO: origin: get_coco_cat(int(category_id)) 'bbox': { "b": bbox }, "segmentation": rle, 'score': float(scores[i]) }) return instances
class DOTMask(): def __init__(self, nn, input_device): """ Initialisation function """ print('Loading model...') self.nn = nn if self.nn == 'yolact': print("Selected NN: Yolact") # Yoloact imports sys.path.append('../nn/yolact/') from yolact import Yolact from data import cfg, set_cfg, set_dataset import torch import torch.backends.cudnn as cudnn set_cfg("yolact_resnet50_config") #set_cfg("yolact_resnet50_config") cfg.eval_mask_branch = True cfg.mask_proto_debug = False cfg.rescore_bbox = True self.net = Yolact() self.net.load_weights("../weights/yolact_resnet50_54_800000.pth") #self.net.load_weights("../weights/yolact_resnet50_54_800000.pth") self.net.eval() cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = self.net.cuda() elif self.nn == 'yolact++': print("Selected NN: Yolact++") # Yoloact imports sys.path.append('../nn/yolact/') from yolact import Yolact from data import cfg, set_cfg, set_dataset import torch import torch.backends.cudnn as cudnn set_cfg("yolact_plus_resnet50_config") #set_cfg("yolact_resnet50_config") cfg.eval_mask_branch = True cfg.mask_proto_debug = False cfg.rescore_bbox = True self.net = Yolact() self.net.load_weights("../weights/yolact_plus_resnet50_54_800000.pth") #self.net.load_weights("../weights/yolact_resnet50_54_800000.pth") self.net.eval() cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = self.net.cuda() elif self.nn == 'yolact_edge': print("Selected NN: Yolact_edge") #Yoloact_edge imports sys.path.append('../nn/yolact_edge') from yolact import Yolact from data import cfg, set_cfg, set_dataset import torch import torch.backends.cudnn as cudnn set_cfg("yolact_edge_resnet50_config") cfg.eval_mask_branch = True cfg.mask_proto_debug = False cfg.rescore_bbox = True self.net = Yolact() self.net.load_weights("../weights/yolact_edge_resnet50_54_800000.pth") self.net.eval() cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.net = self.net.cuda() elif self.nn == 'mrcnn': print("Selected NN: Mask-RCNN") # Keras import keras from keras.models import Model from keras import backend as K K.common.set_image_dim_ordering('tf') # Mask-RCNN sys.path.append('../nn/Mask_RCNN/') from mrcnn import config from mrcnn import utils from mrcnn import model as modellib from inference_config import InferenceConfig self.config = InferenceConfig() self.model = modellib.MaskRCNN( mode="inference", model_dir="../weights/",#"../nn/Mask_RCNN/mrcnn/", config=self.config) # Load weights trained on MS-COCO self.model.load_weights("../weights/mask_rcnn_coco.h5", by_name=True) else: print("no nn defined") self.bridge = CvBridge() self._max_inactive_frames = 10 # Maximum nb of frames before destruction self.next_object_id = 0 # ID for next object self.objects_dict = {} # Detected objects dictionary self.var_init = 0 self.cam_pos_qat = np.array([[0.,0.,0.],[0.,0.,0.,1.]]) self.cam_pos = np.array([[0.,0.,0.],[0.,0.,0.]]) self.dilatation = 1 self.score_threshold = 0.1 self.max_number_observation = 5 self.human_threshold = 0.01 self.object_threshold = 0.3 self.iou_threshold = 0.9 self.selected_classes = [0, 56, 67] self.masked_id = [] #if input_device == 'xtion': # self.human_threshold = 0.1 # self.iou_threshold = 0.3 self.depth_image_pub = rospy.Publisher( "/camera/depth_registered/masked_image_raw", Image,queue_size=1) self.dynamic_depth_image_pub = rospy.Publisher( "/camera/depth_registered/dynamic_masked_image_raw", Image,queue_size=1) self.frame = [] self.depth_frame = [] self.msg_header = std_msgs.msg.Header() self.depth_msg_header = std_msgs.msg.Header() # Class names COCO dataset self.class_names = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] def get_active(self, val): for key in self.objects_dict: if self.objects_dict[key]["maskID"] == val: return self.objects_dict[key]["activeObject"] return "Key not exist" def class_selection(self, masks_in, class_ids): """ Function for Mask class selection (Selected classes : 1,40,41,42,57) """ if len(masks_in.shape) > 1: masks=copy.deepcopy(masks_in) x = np.zeros([class_ids.shape[0], masks.shape[1], masks.shape[2]]) for l in range(masks.shape[0]): if (class_ids[l] == 0 or class_ids[l] == 39 or class_ids[l] == 56): x[l, :, :] = masks[l, :, :] else: x[l, :, :] = 0 return x else: x = np.zeros([1, 480, 640]) return x def static_masks_selection(self, masks_in, class_ids): """ Function for static Mask class selection """ if len(masks_in.shape) > 1: masks=copy.deepcopy(masks_in) x = np.zeros([masks.shape[0], masks.shape[1], masks.shape[2]]) for i in self.objects_dict: if not np.in1d(i, self.masked_id): if self.objects_dict[i]["activeObject"] == 1 and self.objects_dict[i]["maskID"] < masks.shape[0] and (class_ids[self.objects_dict[i]["maskID"]] == 0 or class_ids[self.objects_dict[i]["maskID"]] == 39 or class_ids[self.objects_dict[i]["maskID"]] == 56): x[self.objects_dict[i]["maskID"], :, :] = masks[self.objects_dict[i]["maskID"], :, :] elif self.objects_dict[i]["activeObject"] == 0 and self.objects_dict[i]["maskID"] < masks.shape[0]: x[self.objects_dict[i]["maskID"], :, :] = 0 else: pass self.masked_id.append(i) return x else: x = np.zeros([1, 480, 640]) return x def read_objects_pose(self): for i in self.objects_dict: if self.objects_dict[i]["classID"]==0: object_type = "Person" elif self.objects_dict[i]["classID"]==39: object_type = "Bottle" elif self.objects_dict[i]["classID"]==56: object_type = "Chair" else: object_type = "Nan" try: (self.objects_dict[i]["worldPose"],rot) = listener.lookupTransform('/map',object_type+'_'+str(i), rospy.Time(0)) except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException): continue def handle_objects_pose(self): for i in self.objects_dict: if self.objects_dict[i]["classID"]==0 or self.objects_dict[i]["classID"]==39 or self.objects_dict[i]["classID"]==56: if self.objects_dict[i]["classID"]==0: object_type = "Person" elif self.objects_dict[i]["classID"]==39: object_type = "Bottle" elif self.objects_dict[i]["classID"]==56: object_type = "Chair" else: object_type = "Nan" br = tf.TransformBroadcaster() e_pose = self.objects_dict[i]["estimatedPose"] br.sendTransform((e_pose[0], e_pose[1], e_pose[2]), tf.transformations.quaternion_from_euler(0,0,0), rospy.Time.now(), object_type+'_'+str(i), '/map') def iou_centered_centroid(self, rois_old, rois_new, mask_old, mask_new): # intersection_over_union applied on centered centroid img_v = mask_old.shape[0] img_h = mask_old.shape[1] pad_x_old = int((img_v-(rois_old[3]-rois_old[1]))/2) pad_y_old = int((img_h-(rois_old[2]-rois_old[0]))/2) pad_x_new = int((img_v-(rois_new[3]-rois_new[1]))/2) pad_y_new = int((img_h-(rois_new[2]-rois_new[0]))/2) cropped_mask_old = mask_old[rois_old[1]:rois_old[3], rois_old[0]:rois_old[2]] cropped_mask_new = mask_new[rois_new[1]:rois_new[3], rois_new[0]:rois_new[2]] centered_mask_old = add_padding(cropped_mask_old, pad_y_old, pad_x_old, pad_y_old, pad_x_old) centered_mask_new = add_padding(cropped_mask_new, pad_y_new, pad_x_new, pad_y_new, pad_x_new) centered_mask_old_croped = centered_mask_old[1:478, 1:638] centered_mask_new_croped = centered_mask_new[1:478, 1:638] intersection = np.logical_and(centered_mask_old_croped, centered_mask_new_croped) union = np.logical_or(centered_mask_old_croped, centered_mask_new_croped) iou = np.sum(intersection) / np.sum(union) return iou def apply_depth_image_masking(self, image_in, masks): """Apply the given mask to the image. """ image = copy.deepcopy(image_in) image_static = copy.deepcopy(image_in) for i in range(masks.shape[0]): is_active = self.get_active(i) mask = masks[i, :, :] mask = ndimage.binary_dilation(mask, iterations=self.dilatation) if is_active == 1: image[:, :] = np.where(mask == 1, 0, image[:, :]) image_static[:, :] = np.where(mask == 1, 0, image[:, :]) else: image[:, :] = np.where(mask == 1, 0, image[:, :]) return image_static, image def mask_dilatation(self, masks): timebefore = time.time() mask=copy.deepcopy(masks) for i in range(mask.shape[0]): mask[i] = ndimage.binary_dilation(mask[i], iterations=self.dilatation) print("Numpy dilation time : ", - (timebefore - time.time())) return mask def mask_dilatation_cv(self, masks): timebefore = time.time() mask=copy.deepcopy(masks) kernel = np.ones((3,3)) for i in range(mask.shape[0]): mask[i] = cv2.dilate(mask[i],kernel, iterations=self.dilatation) print("cv2 dilation time : ", - (timebefore - time.time())) return mask def get_masking_depth(self, image, mask): """Apply the given mask to the image. """ x = np.zeros([image.shape[0], image.shape[1]]) y = np.zeros(mask.shape[0]) for i in range(mask.shape[0]): x[:, :] = np.where(mask[i,:,:] != 1, 0, image[:, :]) x[:, :] = np.where( np.isnan(x[:,:]), 0, x[:, :]) if sum(sum((x[:, :]!=0))) == 0: y[i] = 0 else: y[i] = (x[:, :].sum()/sum(sum((x[:, :]!=0)))) return y def add_object(self, centroid, dimensions, mask_id, class_id, mask_old, rois_old): dt = 0.25 try: (transc, rotc) = listener.lookupTransform('/map', self.tf_camera, rospy.Time(0)) except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException): transc = np.array([0.,0.,0.]) rotc = np.array([0.,0.,0.,1.]) euler = tf.transformations.euler_from_quaternion(rotc) rot = tf.transformations.euler_matrix(euler[0],euler[1],euler[2]) h_mat = rot h_mat[0:3,3:] = np.array([transc]).T b = h_mat.dot(np.array([[centroid[0],centroid[1],centroid[2],1]]).T)[0:3,:] y = np.array([b[0,0], b[1,0], b[2,0]]) x = [y[0], y[1], y[2], 0, 0, 0] P = np.eye(len(x)) F = np.array([[ 1, 0, 0, dt, 0, 0], [ 0, 1, 0, 0, dt, 0], [ 0, 0, 1, 0, 0, dt], [ 0, 0, 0, 1, 0, 0], [ 0, 0, 0, 0, 1, 0], [ 0, 0, 0, 0, 0, 1]]) H = np.array([[ 0.001, 0, 0, 0, 0, 0], [ 0, 0.001, 0, 0, 0, 0], [ 0, 0, 0.001, 0, 0, 0]]) if class_id == 1: ax = 0.68 ay = 0.68 az = 0.68 else: ax = 1 ay = 1 az = 1 Q = np.array([[((dt**4)/4)*(ax**2), 0.0, 0.0, ((dt**4)/4)*(ax**3), 0.0, 0.0], [0.0, ((dt**4)/4)*(ay**2), 0.0, 0.0, ((dt**4)/4)*(ay**3), 0.0], [0.0, 0.0, ((dt**4)/4)*(az**2), 0.0, 0.0, ((dt**4)/4)*(az**3)], [((dt**4)/4)*(ax**3), 0.0, 0.0, (dt**2)*(ax**2), 0.0, 0.0], [0.0, ((dt**4)/4)*(ay**3), 0.0, 0.0, (dt**2)*(ax**2), 0.0], [0.0, 0.0, ((dt**4)/4)*(az**3), 0.0, 0.0, (dt**2)*(ax**2)]]) R = np.array([[ 0.8, 0, 0], [ 0, 0.8, 0], [ 0, 0, 1.2]]) self.objects_dict.update({self.next_object_id : { "kalmanFilter" : extendedKalmanFilter(x, P, F, H, Q, R), "centroid" : centroid, "dimension" : dimensions, "classID" : class_id, "roisOld" : rois_old, "maskID" : mask_id, "maskOld" : mask_old, "worldPose" : [0,0,0], "estimatedVelocity" : [0,0,0], "estimatedPose" : [0,0,0], "inactiveNbFrame" : 0, "activeObject" : 0}}) self.next_object_id = self.next_object_id+1 def delete_object(self, object_id): del self.objects_dict[object_id] def mask_to_centroid(self, rois, mask_depth): current_centroids = {} current_dimensions = {} for i in range(len(rois)): # 3D centroids from depth frame if args.input == 'tum': fx = 525.0 # focal length x fy = 525.0 # focal length y cx = 319.5 # optical center x cy = 239.5 # optical center y elif args.input == 'xtion': # Asus xtion sensor fx = 525 fy = 525 cx = 319.5 cy = 239.5 elif args.input == 'zed': # Zed sensor left img vga fx = 350.113 fy = 350.113 cx = 336.811 cy = 190.357 else: print("No valid input") # Translation from depth pixel to local point if mask_depth[i] == -1: z = 0 else : z = mask_depth[i] y = (((rois[i,3]+rois[i,1])/2) - cy) * z / fy x = (((rois[i,2]+rois[i,0])/2) - cx) * z / fx # Translation from point to world coord current_centroids.update({i:[x, y, z]}) current_dimensions.update({i:[rois[i,3]-rois[i,1], rois[i,2]-rois[i,0]]}) return current_centroids, current_dimensions def live_analysis(self): """ Function for live stream video masking """ bar = [ " Waiting for frame [= ] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", " Waiting for frame [ =] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", ] idx = 0 while not rospy.is_shutdown(): start_time = time.time() self.masked_id = [] current_frame = self.frame current_depth_frame = self.depth_frame if len(current_frame)==0 or len(current_depth_frame)==0 : print(bar[idx % len(bar)], end= "\r") idx = idx +1 time.sleep(0.1) else: nn_start_time = time.time() if self.nn == 'yolact' or self.nn == 'yolact++' or self.nn == 'yolact_edge': frame = torch.from_numpy(current_frame).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) if self.nn == 'yolact_edge': extras = {"backbone": "full", "interrupt":False, "keep_statistics":False, "moving_statistics":None} preds = self.net(batch.cuda(), extras=extras) preds = preds["pred_outs"] else: preds = self.net(batch.cuda()) nn_pred_time = time.time() h, w, _ = frame.shape b = {} r = {} b['class_ids'], b['scores'], b['rois'], b['masks'] = postprocess(preds, w, h, score_threshold=self.score_threshold) r['class_ids'] = copy.deepcopy(b['class_ids'].cpu().data.numpy()) r['scores'] = copy.deepcopy(b['scores'].cpu().data.numpy()) r['rois'] = copy.deepcopy(b['rois'].cpu().data.numpy()) r['masks'] = copy.deepcopy(b['masks'].cpu().data.numpy()) elif self.nn == 'mrcnn': results = self.model.detect([current_frame],verbose=1) r = results[0] r['masks'] = np.swapaxes(r['masks'],0,2) r['masks'] = np.swapaxes(r['masks'],1,2) for i in range(r['rois'].shape[0]): buff = r['rois'][i] r['rois'][i] = [buff[1],buff[0],buff[3],buff[2]] r['class_ids'] = r['class_ids'] - 1 ''' Deprecated, did not enhance speed j=0 for i in range(len(r['class_ids'])): if not np.in1d(r['class_ids'][j], self.selected_classes): r['class_ids'] = np.delete(r['class_ids'], j) r['scores']= np.delete(r['scores'], j) r['rois']= np.delete(r['rois'], j,axis=0) r['masks']= np.delete(r['masks'], j, axis=0) else: j=j+1 ''' self.number_observation = min(self.max_number_observation, r['class_ids'].shape[0]) for j in range(self.number_observation): if r['scores'][j] < self.score_threshold: self.number_observation = j break r['class_ids'] = r['class_ids'][:self.number_observation] r['scores'] = r['scores'][:self.number_observation] r['rois'] = r['rois'][:self.number_observation] r['masks'] = r['masks'][:self.number_observation] nn_time = time.time() mask_depth = self.get_masking_depth(current_depth_frame, r['masks']) # Read object tf pose self.read_objects_pose() # Read camera tf pose try: (transc, rotc) = listener.lookupTransform(self.tf_camera,'/map', rospy.Time(0)) except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException): transc = np.array([0.,0.,0.]) rotc = np.array([0.,0.,0.,1.]) euler = tf.transformations.euler_from_quaternion(rotc) rot = tf.transformations.euler_matrix(euler[0],euler[1],euler[2]) h_mat = rot h_mat[0:3,3:] = np.array([transc]).T objects_to_delete = [] # Main filter update and prediction step if len(r['rois']) == 0: for i in self.objects_dict: self.objects_dict[i]["inactiveNbFrame"] = self.objects_dict[i]["inactiveNbFrame"] + 1 if self.objects_dict[i]["inactiveNbFrame"] > self._max_inactive_frames: objects_to_delete.append(i) for i in objects_to_delete: self.delete_object(i) else : current_centroids, current_dimensions = self.mask_to_centroid(r['rois'],mask_depth) if not self.objects_dict: if not len(current_centroids)==0: for i in range(len(current_centroids)): self.add_object(current_centroids[i], current_dimensions[i], i, r['class_ids'][i], r['masks'][i], r['rois'][i]) for i in self.objects_dict: self.objects_dict[i]["kalmanFilter"].prediction() self.objects_dict[i]["kalmanFilter"].update(self.objects_dict[i]["centroid"], h_mat) self.objects_dict[i]["estimatedPose"] = self.objects_dict[i]["kalmanFilter"].x[0:3] self.objects_dict[i]["estimatedVelocity"] = self.objects_dict[i]["kalmanFilter"].x[3:6] else: objects_pose = np.zeros((len(self.objects_dict),3)) objects_ids = np.zeros((len(self.objects_dict))) index = 0 for i in self.objects_dict: objects_pose[index,] = self.objects_dict[i]["centroid"] objects_ids[index] = i index = index + 1 centroids_pose = np.zeros((len(current_centroids),3)) for i in range(len(current_centroids)): centroids_pose[i,] = current_centroids[i] eucledian_dist_pairwise = np.array(cdist(objects_pose, centroids_pose)).flatten() index_sorted = np.argsort(eucledian_dist_pairwise) used_objects = [] used_centroids = [] for index in range(len(eucledian_dist_pairwise)): object_id = int(index_sorted[index] / len(centroids_pose)) centroid_id = index_sorted[index] % len(centroids_pose) if not np.in1d(object_id, used_objects) and not np.in1d(centroid_id, used_centroids):# and (eucledian_dist_pairwise[index]<0.5): if self.objects_dict[objects_ids[object_id]]["classID"] == r['class_ids'][centroid_id]: timebefore = time.time() used_objects.append(object_id) used_centroids.append(centroid_id) self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction() self.objects_dict[objects_ids[object_id]]["kalmanFilter"].update(current_centroids[centroid_id], h_mat) self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[0:3] self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[3:6] if self.objects_dict[objects_ids[object_id]]["classID"] == 0: max_threshold = self.human_threshold else: max_threshold = self.object_threshold if abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][0])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][1])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][2])>max_threshold: self.objects_dict[objects_ids[object_id]]["activeObject"] = 1 else: self.objects_dict[objects_ids[object_id]]["activeObject"] = 0 if self.objects_dict[objects_ids[object_id]]["classID"] == 0 and self.objects_dict[objects_ids[object_id]]["activeObject"] == 0: iou = self.iou_centered_centroid(self.objects_dict[objects_ids[object_id]]["roisOld"], r['rois'][centroid_id], self.objects_dict[objects_ids[object_id]]["maskOld"],r['masks'][centroid_id]) if iou<self.iou_threshold: self.objects_dict[objects_ids[object_id]]["activeObject"] = 1 else: x=1 self.objects_dict[objects_ids[object_id]]["centroid"] = centroids_pose[centroid_id] self.objects_dict[objects_ids[object_id]]["dimensions"] = current_dimensions[centroid_id] self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] = 0 self.objects_dict[objects_ids[object_id]]["maskID"] = centroid_id self.objects_dict[objects_ids[object_id]]["maskOld"] = r['masks'][centroid_id] self.objects_dict[objects_ids[object_id]]["roisOld"] = r['rois'][centroid_id] if len(centroids_pose) < len(objects_pose): for index in range(len(eucledian_dist_pairwise)): object_id = int(index_sorted[index] / len(objects_pose)) if not np.in1d(object_id, used_objects): self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] += 1 self.objects_dict[objects_ids[object_id]]["activeObject"] = 0 if self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] >= self._max_inactive_frames: self.delete_object(objects_ids[object_id]) used_objects.append(object_id) else: self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction() self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[0:3] self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[3:6] elif len(centroids_pose) > len(objects_pose): buff_id = self.next_object_id for index in range(len(eucledian_dist_pairwise)): centroid_id = index_sorted[index] % len(centroids_pose) if not np.in1d(centroid_id, used_centroids): self.add_object(current_centroids[centroid_id], current_dimensions[centroid_id], centroid_id, r['class_ids'][centroid_id], r['masks'][centroid_id], r['rois'][centroid_id]) self.objects_dict[buff_id]["kalmanFilter"].prediction() self.objects_dict[buff_id]["kalmanFilter"].update(current_centroids[centroid_id], h_mat) self.objects_dict[buff_id]["estimatedPose"] = self.objects_dict[buff_id]["kalmanFilter"].x[0:3] self.objects_dict[buff_id]["estimatedVelocity"] = self.objects_dict[buff_id]["kalmanFilter"].x[3:6] buff_id = buff_id + 1 kalman_time = time.time() # Write objects filter pose to tf self.handle_objects_pose() result_dynamic_depth_image, result_depth_image = self.apply_depth_image_masking(current_depth_frame, r['masks']) DDITS = Image() DDITS = self.bridge.cv2_to_imgmsg(result_dynamic_depth_image,'32FC1') DDITS.header = self.depth_msg_header self.dynamic_depth_image_pub.publish(DDITS) DITS = Image() DITS = self.bridge.cv2_to_imgmsg(result_depth_image,'32FC1') DITS.header = self.depth_msg_header self.depth_image_pub.publish(DITS) print_time = time.time() #print(" NN pred time: ", format(nn_pred_time - nn_start_time, '.3f'),", NN post time: ", format(nn_time - nn_pred_time, '.3f'),", NN time: ", format(nn_time - start_time, '.3f'), ", Kalman time: ", format(kalman_time - nn_time, '.3f'), #", Print time: ", format(print_time - kalman_time, '.3f'), ", Total time: ", format(time.time() - start_time, '.3f'), #", FPS :", format(1/(time.time() - start_time), '.2f'), end="\r") def image_callback(self, msg): self.msg_header = msg.header self.frame = self.bridge.imgmsg_to_cv2(msg, "bgr8") def depth_image_callback(self, msg): self.depth_msg_header = msg.header #32FC1 for asus xtion #8UC1 forkicect self.depth_frame = self.bridge.imgmsg_to_cv2(msg, "32FC1")
from layers.output_utils import postprocess import pycocotools from data import cfg, set_cfg, set_dataset import numpy as np import torch import torch.backends.cudnn as cudnn from torch.autograd import Variable from collections import defaultdict import matplotlib.pyplot as plt import cv2 set_cfg('yolact_resnet50_config') cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') net = Yolact() net.load_weights('weights/yolact_resnet50_54_800000.pth') net.eval() net = net.cuda() net.detect.use_fast_nms = True cfg.mask_proto_debug = False path = "cat.jpg" frame = torch.from_numpy(cv2.imread(path)).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) print(batch.shape) preds = net(batch)
def evaluate(image, train_mode=False): mask_proto_debug=False net.detect.use_fast_nms = True cfg.mask_proto_debug = mask_proto_debug output_image = evalimage(image) return output_image config = None detect = False dataset=None cuda = True model_path = SavePath.from_str("yolact_darknet53_54_800000.pth") config = model_path.model_name + '_config' print('Config not specified. Parsed %s from the file name.\n' % config) set_cfg(config) cfg.eval_mask_branch = True cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') print('Loading model...') net = Yolact() net.load_weights("/home/venkat/Documents/projects/Perception-ros-tuggerbot/src/perception/yolact_depth_perception/scripts/yolact_darknet53_54_800000.pth") net.eval() print(' Done.') net = net.cuda()
def train(): #1: train 결과를 저장할 폴더를 생성 if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) #2: MSCOCO에서 제공하는 API를 통해 train dataset을 준비한다. dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) # 만약 train-validation기법을 사용한다면, eval dataset도 준비한다. if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) #3: 구현한 yolact() class의 객체를 만들고 train모드로 설정. #주의 : net과 yolact_net은 메모리에 저장된 같은 객체를 공유한다. # 다만 net은 이후에 yolact와 MultiBoxLoss가 결함되어 train을 위한 # 통합된 객체로 다시 정의되기 때문에 yolact넷 객체에만 따로 접근하기 위해 # yolact_net을 deep copy본으로 가지고 있는다. yolact_net = Yolact() net = yolact_net net.train() ####################################################################### #######RESUME 관련##################################################### #4: args.log와 args.resume은 train도중 log를 남기는 것과, train이 # 불가피하게 중도에 정지되었을 경우, 중단 지점부터 재시작할 수 있도록 # 기능을 만든 것이므로 필요한 경우에만 더 자세히 보도록 하자. if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) #######END############################################################# ####################################################################### #5: yolact의 optimizer와 loss함수를 설정한다. optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio) #6: 멀티 GPU를 사용하는 경우 각 GPU에 batch size를 분할해준다. # 만약 총 Batch size가 맞지 않으면 뭔가 잘못된 것이므로 프로그램 종료. if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print( 'Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) #7: 현재까지 설정된 net과 loss 함수를 엮어 더 통합된 net으로 만듬. # 이제 net을 호출하면, bbox를 detection하고, fast nms를 거쳐 한 번 # 필터링을 한 후, ground truth와 비교하여 loss를 계산하고, 이 과정을 # 멀티 GPU일 경우 알아서 각 device에 작업을 분할해준다. # yolact_net은 net에 포함된 yolact()만을 가리킨다. net = CustomDataParallel(NetLoss(net, criterion)) if args.cuda: net = net.cuda() #8: yolact_net의 batch_normalization layer를 모두 false로 만든 뒤에 # 0만을 가지고 있는 zero_tensor를 모델에 통과시켜, 파라미터를 초기화시켜준다. # 그 후에 다시 batch_normalization layer를 train모드로 바꿔준다. # 굳이 이런 과정을 거치는 이유는 저자가 batch_normalization에 미리 넣어놓은 # 평균/분산 값은 초기화하고 싶지 않기 때문이다. if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means (torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) #9: loss counters # bbox의 위치에 대한 loss와, class confidence에 대한 loss 를 담을 변수를 생성하고, # batch_size와 dataset의 크기에 맞는 1 epoch의 size와 몇 epoch를 돌려야하는지 구한다. loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) #cw : 음수입력을 허용치 않기 위해... GOOD last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) #10:Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index # step_index는 learning rate decay를 위해 사용하는 index이다. # data_loader는 train중에 순서대로 데이터셋을 준비해서 넘겨주는 class이다. # 여기서 객체를 만들어 저장한다. step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) #11:특정 epoch와 iteration에 도달했을 때, 중간 과정을 save_path에 저장하기 위한 # 람다 함수를 정의하고, time_avg와 loss_avg는 MovingAverage 클래스의 객체로써 # 훈련 중간 과정의 loss를 이동평균 값으로 보여주기 위해 선언되는 객체이다. save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} #12: main train이 시작되는 부분(#A ~ #F) print('Begin training!') print() # A # try-except를 사용하여 ctrl+c(keyboardInterrupt)를 통해 # 훈련을 중단하고 진행내용은 저장할 수 있다. # 중단지점부터 재시작하고 싶으면 train.py실행 시 --resume인자를 사용한다. try: #9에서 계산된 num_epochs만큼 반복. for epoch in range(num_epochs): # B # --resume을 이용해 시작했다면, 재시작 iter에 도달할 때까지 continue, # 또한 data_loader에서 data를 불러오며 loss를 계산하는데, # 도중에 목표 iteration에 도달했으면 break하여 1 epoch를 종료한다. if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # 목표한만큼 훈련이 되었다면, 종료한다. # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # 목표로 설정된 반복횟수가 max_iter보다 크면 max_iter에서 훈련을 마친다. # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # 특정 iteration에 config값이 바뀌도록 할 경우의 작업을 수행한다. # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # C # [learning rate 조정] # train시작한지 얼마 안되었을 경우(lr_warmup_until기준) 훈련을 조금 가속시키기 위해 조정. # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # 특정 iteration에 도달할 때마다 learning rate decay수행. # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # D # loss 함수 계산. # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Propagation을 수행하고 수행 결과로 loss 함수를 통해 1 iteration의 loss를 계산한다. # 구체적인 동작은 Backbone.py의 resnet101, yolact.py의 yolact, MultiBoxLoss.py의 MultiBoxLoss 클래스를 모두 보아야 한다. # (see CustomDataParallel and NetLoss) losses = net(datum) losses = {k: (v).mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # E # Backward Propagation을 수행하고, # 계산가능한 값일 경우, optimizer.step()을 통해 parameters에 적용 # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # F # train진행 과정에서 소요 시간과, 중간 loss값을 출력하여 중간 성과를 # 파악 할 수 있도록 해주는 파트. # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) # log를 파일로 기록 if args.log: precision = 5 loss_info = { k: round(losses[k].item(), precision) for k in losses } loss_info['T'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0 ) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu # ~F # 1번 반복하면, 1 iter증가. iteration += 1 # 주기마다 진행과정을 저장하는 작업 수행. if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # train-validation으로 작업을 수행하는 경우, # 1 epoch를 돌렸을 때 validation 주기에 도달한 epoch였으면 validate 1회 진행하여 mAP측정. if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) # Compute validation mAP after training is finished compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) #13: Ctrl + c를 이용하여 훈련을 중단했을 경우, save_foler에 weights를 저장하고 중단하여 # 다음에 다시 재시작할 수 있도록 한다. except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
def compute_validation_loss(data_loader, val_loader, criterion): global loss_types # loss counters yolact_net = Yolact() net = yolact_net net.train() net = CustomDataParallel(NetLoss(net, criterion)) if args.cuda: net = net.cuda() weight_paths = os.listdir(args.resume) # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) epoch_size = len(data_loader) num_epochs = math.ceil(cfg.max_iter / epoch_size) with torch.no_grad(): # Don't switch to eval mode because we want to get losses next_iterations = args.start_iter for epoch in range(num_epochs): new_epoch = next_iterations // epoch_size if epoch != new_epoch: continue for idx, datum in enumerate(tqdm(range(len(data_loader)))): iterations = epoch * epoch_size + idx if iterations % 1500 == 0: stop = True for path in weight_paths: iter_id = path.split('_')[-1][:-4] epoch_id = int(path.split('_')[-2]) if int(iter_id) == iterations: stop = False break if stop: print("Stop at iter {}".format(iterations)) return None weight_name = path #"yolact_taco_{}_{}.pth".format(epoch_id,iterations) weight_path = os.path.join(args.resume, weight_name) print('Loading {}...'.format(weight_name)) yolact_net.load_weights(weight_path) else: continue datum = None losses = {} total_train = len(data_loader) for idx, datum in enumerate(tqdm(data_loader)): try: _losses = net(datum) _losses = {k: (v).mean() for k, v in _losses.items()} for k, v in _losses.items(): if k in losses: losses[k] += v else: losses[k] = v except IndexError as e: total_train -= 1 continue for k in losses.keys(): losses[k] /= total_train total_train_loss = sum([k for k in losses.values()]) print('Train loss: {}'.format(total_train_loss.item())) datum = None _losses = None losses = {} total_val = len(val_loader) for idx, datum in enumerate(tqdm(val_loader)): try: _losses = net(datum) _losses = {k: (v).mean() for k, v in _losses.items()} for k, v in _losses.items(): if k in losses: losses[k] += v else: losses[k] = v except IndexError as e: total_val -= 1 continue for k in losses.keys(): losses[k] /= total_val total_val_loss = sum([k for k in losses.values()]) print('Val loss: {}'.format(total_val_loss.item())) next_iterations += 1500 with open(args.log_loss, 'a+') as f: f.write('{}_{}_{}\r'.format(iterations, total_train_loss.item(), total_val_loss.item()))
class YolactEdgeEngine: def __init__(self): parse_args(self) self.args.config = 'yolact_edge_mobilenetv2_config' set_cfg(self.args.config) self.args.trained_model = '/home/ht/catkin_ws/src/instance_segmentation/scripts/weights/yolact_edge_mobilenetv2_124_10000.pth' self.args.top_k = 10 self.args.score_threshold = 0.3 self.args.trt_batch_size = 3 self.args.disable_tensorrt = False self.args.use_fp16_tensorrt = False self.args.use_tensorrt_safe_mode = True self.args.cuda = True self.args.fast_nms = True self.args.display_masks = True self.args.display_bboxes = True self.args.display_text = True self.args.display_scores = True self.args.display_linecomb = False self.args.fast_eval = False self.args.deterministic = False self.args.no_crop = False self.args.crop = True self.args.calib_images = '/home/ht/catkin_ws/src/instance_segmentation/scripts/data/coco/calib_images' setup_logger(logging_level=logging.INFO) self.logger = logging.getLogger('yolact.eval') self.color_cache = defaultdict(lambda: {}) with torch.no_grad(): cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') self.logger.info('Loading model...') self.net = Yolact(training=False) if self.args.trained_model is not None: self.net.load_weights(self.args.trained_model, args=self.args) else: self.logger.warning('No weights loaded!') self.net.eval() self.logger.info('Model loaded.') convert_to_tensorrt(self.net, cfg, self.args, transform=BaseTransform()) def evaluate(self, train_mode=False, train_cfg=None): with torch.no_grad(): self.net = self.net.cuda() self.net.detect.use_fast_nms = self.args.fast_nms cfg.mask_proto_debug = self.args.mask_proto_debug inp, out = self.args.images.split(':') self.evalimages(inp, out) def evalimages(self, input_folder: str, output_folder: str): if not os.path.exists(output_folder): os.mkdir(output_folder) print() for p in Path(input_folder).glob('*'): path = str(p) name = os.path.basename(path) name = '.'.join(name.split('.')[:-1]) + '.jpg' out_path = os.path.join(output_folder, name) img = cv2.imread(path) img_out = self.evalimage(img, out_path) #print(path + ' -> ' + out_path) print('Done.') def detect(self, img_in, return_imgs=False): with torch.no_grad(): self.net = self.net.cuda() self.net.detect.use_fast_nms = self.args.fast_nms cfg.mask_proto_debug = self.args.mask_proto_debug #return self.evalimage(img_in[0]) return self.evalbatch(img_in, return_imgs) def evalbatch(self, imgs, return_imgs=False): frame = torch.from_numpy(np.array(imgs)).cuda().float() batch = FastBaseTransform()(frame) if cfg.flow.warp_mode != 'none': assert False, 'Evaluating the image with a video-based model.' extras = { "backbone": "full", "interrupt": False, "keep_statistics": False, "moving_statistics": None } #start_time = time.time() preds = self.net(batch, extras=extras)["pred_outs"] #end_time = time.time() #print('%.3f s' % (end_time-start_time)) imgs_out = [] allres = [] for i, img in enumerate(imgs): if return_imgs: img_out, res = self.prep_display(preds, frame[i], None, None, undo_transform=False, batch_idx=i, create_mask=True, return_imgs=return_imgs) imgs_out.append(img_out) allres.append(res) else: res = self.prep_display(preds, frame[i], None, None, undo_transform=False, batch_idx=i, create_mask=True, return_imgs=return_imgs) allres.append(res) if return_imgs: return imgs_out, allres else: return allres def evalimage(self, img, save_path=None): frame = torch.from_numpy(img).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) if cfg.flow.warp_mode != 'none': assert False, 'Evaluating the image with a video-based model.' extras = { "backbone": "full", "interrupt": False, "keep_statistics": False, "moving_statistics": None } preds = self.net(batch, extras=extras)["pred_outs"] return self.prep_display(preds, frame, None, None, undo_transform=False, create_mask=True) #if save_path: # cv2.imwrite(save_path, img_numpy) #return img_numpy, mask def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, batch_idx=0, create_mask=False, return_imgs=False): if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape #print(h, " ", w) with timer.env('Postprocess'): t = postprocess(dets_out, w, h, batch_idx, visualize_lincomb=self.args.display_linecomb, crop_masks=self.args.crop, score_threshold=self.args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: masks = t[3][:self.args.top_k] classes, scores, boxes = [ x[:self.args.top_k].cpu().numpy() for x in t[:3] ] num_dets_to_consider = min(self.args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < self.args.score_threshold: num_dets_to_consider = j break idx_fil = [] for i in range(num_dets_to_consider): if cfg.dataset.class_names[ classes[i]] == 'car' or cfg.dataset.class_names[ classes[i]] == 'truck': idx_fil.append(i) num_dets_to_consider = len(idx_fil) if num_dets_to_consider == 0: # no detection found so just output original image if not create_mask: return (img_gpu * 255).byte().cpu().numpy() elif return_imgs: return (img_gpu * 255).byte().cpu().numpy(), ImageResult( None, None, None, np.zeros((h, w, 1), dtype='uint8'), 0) else: return ImageResult(None, None, None, np.zeros((h, w, 1), dtype='uint8'), 0) # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in self.color_cache[on_gpu]: return self.color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. self.color_cache[on_gpu][color_idx] = color return color if self.args.display_masks and cfg.eval_mask_branch: # after this, mask is of size [num_dets, h, w, l] #masks = masks[:num_dets_to_consider, :, :, None] #classes = classes[:num_dets_to_consider] #scores = scores[:num_dets_to_consider] #boxes = boxes[:num_dets_to_consider, :] masks = masks[idx_fil, :, :, None] classes = classes[idx_fil] scores = scores[idx_fil] boxes = boxes[idx_fil, :] if create_mask: mask_img = np.zeros((h, w, 1), dtype='uint8') for j in range(num_dets_to_consider): mask_img += 10 * (j + 1) * masks[j].cpu().numpy().astype( np.uint8) if not return_imgs: return ImageResult(classes, scores, boxes, mask_img, num_dets_to_consider) # prepare the rgb image for each mask given their color (of size [num_dets, w, h, l]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # this is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand # then draw the stuff that needs to be done on cpu # note make sure this is a uint8 tensor or opencv will not anti aliaz text for wahtever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if self.args.display_text or self.args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if self.args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if self.args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if self.args.display_scores else _class text_pt = (x1, y1 - 3) text_color = [255, 255, 255] font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy, ImageResult(classes, scores, boxes, mask_img, num_dets_to_consider)
class YolactWorker(qc.QObject): # emits list of classes, scores, and bboxes of detected objects # bboxes are in (top-left, w, h) format # The even is passed for synchronizing display of image in videowidget # with the bounding boxes sigProcessed = qc.pyqtSignal(np.ndarray, int) sigInitialized = qc.pyqtSignal() sigError = qc.pyqtSignal(YolactException) def __init__(self): super(YolactWorker, self).__init__() self.mutex = qc.QMutex() self._image = None self._pos = 0 self.top_k = 10 self.cuda = torch.cuda.is_available() self.net = None self.score_threshold = 0.15 self.overlap_thresh = 1.0 self.config = yconfig.cfg self.weights_file = '' self.config_file = '' self.video_file = None def setWaitCond(self, waitCond: threading.Event) -> None: _ = qc.QMutexLocker(self.mutex) self._waitCond = waitCond @qc.pyqtSlot(bool) def enableCuda(self, on): settings.setValue('yolact/cuda', on) self.cuda = on @qc.pyqtSlot(int) def setTopK(self, value): _ = qc.QMutexLocker(self.mutex) self.top_k = value @qc.pyqtSlot(int) def setBatchSize(self, value): _ = qc.QMutexLocker(self.mutex) self.batch_size = int(value) @qc.pyqtSlot(float) def setScoreThresh(self, value): _ = qc.QMutexLocker(self.mutex) self.score_threshold = value @qc.pyqtSlot(float) def setOverlapThresh(self, value): """Merge objects if their bboxes overlap more than this.""" _ = qc.QMutexLocker(self.mutex) self.overlap_thresh = value @qc.pyqtSlot(str) def setConfig(self, filename): if filename == '': return self.config_file = filename with open(filename, 'r') as cfg_file: config = yaml.safe_load(cfg_file) for key, value in config.items(): logging.debug('%r \n%r %r', key, type(value), value) self.config.__setattr__(key, value) if 'mask_proto_debug' not in config: self.config.mask_proto_debug = False logging.debug(yaml.dump(self.config)) @qc.pyqtSlot(str) def setWeights(self, filename: str) -> None: if filename == '': raise YolactException('Empty filename for network weights') self.weights_file = filename tic = time.perf_counter_ns() with torch.no_grad(): if self.cuda: cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') self.net = Yolact() self.net.load_weights(self.weights_file, self.cuda) self.net.eval() if self.cuda: self.net = self.net.cuda() toc = time.perf_counter_ns() logging.debug('Time to load weights %f s', 1e-9 * (toc - tic)) self.sigInitialized.emit() @qc.pyqtSlot(np.ndarray, int) def process(self, image: np.ndarray, pos: int): """:returns (classes, scores, boxes) where `boxes` is an array of bounding boxes of detected objects in (xleft, ytop, width, height) format. `classes` is the class ids of the corresponding objects. `scores` are the computed class scores corresponding to the detected objects. Roughly high score indicates strong belief that the object belongs to the identified class. """ _ts = time.perf_counter() logging.debug(f'Received frame {pos}') if self.net is None: self.sigError.emit(YolactException('Network not initialized')) return # Partly follows yolact eval.py tic = time.perf_counter_ns() _ = qc.QMutexLocker(self.mutex) with torch.no_grad(): if self.cuda: image = torch.from_numpy(image).cuda().float() else: image = torch.from_numpy(image).float() batch = FastBaseTransform()(image.unsqueeze(0)) preds = self.net(batch) image_gpu = image / 255.0 h, w, _ = image.shape save = self.config.rescore_bbox self.config.rescore_bbox = True classes, scores, boxes, masks = oututils.postprocess( preds, w, h, visualize_lincomb=False, crop_masks=True, score_threshold=self.score_threshold) idx = scores.argsort(0, descending=True)[:self.top_k] # if self.config.eval_mask_branch: # masks = masks[idx] classes, scores, boxes = [ x[idx].cpu().numpy() for x in (classes, scores, boxes) ] # This is probably not required, `postprocess` uses # `score_thresh` already num_dets_to_consider = min(self.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < self.score_threshold: num_dets_to_consider = j break # logging.debug('Bounding boxes: %r', boxes) # Convert from top-left bottom-right format to # top-left, width, height format if len(boxes) == 0: self.sigProcessed.emit(boxes, pos) return boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2] boxes = np.asanyarray(boxes, dtype=np.int_) if self.overlap_thresh < 1: dist_matrix = pairwise_distance(new_bboxes=boxes, bboxes=boxes, boxtype=OutlineStyle.bbox, metric=DistanceMetric.ios) bad_idx = [jj for ii in range(dist_matrix.shape[0] - 1) \ for jj in range(ii+1, dist_matrix.shape[1]) \ if dist_matrix[ii, jj] < 1 - self.overlap_thresh] good_idx = list(set(range(boxes.shape[0])) - set(bad_idx)) boxes = boxes[good_idx].copy() toc = time.perf_counter_ns() logging.debug('Time to process single _image: %f s', 1e-9 * (toc - tic)) self.sigProcessed.emit(boxes, pos) logging.debug(f'Emitted bboxes for frame {pos}: {boxes}') _dt = time.perf_counter() - _ts logging.debug( f'{__name__}.{self.__class__.__name__}.process: Runtime: {_dt}s')
class MattingService: def __init__(self, model_path="./weights/yolact_im700_54_800000.pth", use_cuda=False): print('Loading model...', end='') self.use_cuda = use_cuda self.trained_model = model_path self.net = Yolact() self.net.load_weights(self.trained_model) self.net.eval() if self.use_cuda: self.net = self.net.cuda() self.net.detect.use_fast_nms = True self.net.detect.use_cross_class_nms = False cfg.mask_proto_debug = False print(' Done.') def process(self, image, top_k=1, score_threshold=0.6): # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo with torch.no_grad(): if image is not None: if ':' in image: inp, _image_name = image.split(':') self._infer_image(self.net, inp, _image_name, top_k, score_threshold) else: _image_name = image.split('/')[-1].split('.')[0] + '.png' out = os.path.join('results/', _image_name) self._infer_image(self.net, image, out, top_k, score_threshold) return _image_name def _infer_image(self, net: Yolact, path, save_path, top_k, score_threshold): if self.use_cuda: frame = torch.from_numpy(cv2.imread(path)).cuda().float() else: frame = torch.from_numpy(cv2.imread(path)).float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = net(batch) img_numpy = self.post_process(preds, frame, None, None, top_k, score_threshold, undo_transform=False) if save_path is None: img_numpy = img_numpy[:, :, (2, 1, 0, 3)] if save_path is None: plt.subplot() plt.imshow(img_numpy) plt.title(path) plt.show() else: # plt.subplot() # plt.imshow(img_numpy) # plt.title(path) # plt.show() cv2.imwrite(save_path, img_numpy) @staticmethod def post_process(dets_out, img, h, w, top_k=1, score_threshold=0.6, undo_transform=True): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=False, crop_masks=False, score_threshold=score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < score_threshold: num_dets_to_consider = j break # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice # After this, mask is of size [num_dets, h, w, 1] final_res = (img_gpu * 255).byte().cpu().numpy() final_res = cv2.cvtColor(final_res, cv2.COLOR_RGB2RGBA) if num_dets_to_consider == 0: return final_res masks = masks[:num_dets_to_consider, :, :, None] _mask = (masks * 255).byte().cpu().numpy()[0] # Then assign the mask to the last channel of the image final_res[:, :, 3] = _mask.squeeze() return final_res
def create_model(weights): yolact = Yolact() yolact.load_weights(weights) return yolact
cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') if args.resume and not args.display: with open(args.ap_data_file, 'rb') as f: ap_data = pickle.load(f) calc_map(ap_data) exit() dataset = None print('Loading model...', end='') net = Yolact() net.load_weights(args.trained_model) net.eval() print(' Done.') if args.cuda: net = net.cuda() net.detect.use_fast_nms = args.fast_nms net.detect.use_cross_class_nms = args.cross_class_nms cfg.mask_proto_debug = args.mask_proto_debug scan = Scan(rgb_paths=rgb_paths, depth_paths=depth_paths, pose_paths=pose_paths, cam_intr=cam_intr, mesh_plot=mesh_plot, scannet_data=scannet_data, mask_net=net, args=args, root_path=root_path, use_gpu=use_gpu)
def detect(): img_path = '/home/user/dataset/pear/train/JPEGImages' save_path = '/home/user/pear_output' weight_path = '/home/user/caoliwei/yolact/weights/20200901/yolact_darknet53_1176_20000.pth' set_cfg('pear_config') with torch.no_grad(): torch.cuda.set_device(0) ###### # If the input image size is constant, this make things faster (hence why we can use it in a video setting). # cudnn.benchmark = True # cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') ###### net = Yolact() net.load_weights(weight_path) net.eval() net = net.cuda() print('model loaded...') net.detect.cross_class_nms = True net.detect.use_fast_nms = True cfg.mask_proto_debug = False if not os.path.exists(save_path): os.mkdir(save_path) img_names = [ name for name in os.listdir(img_path) if name.endswith('.jpg') or name.endswith('.png') ] #for img_name in tqdm(img_names): for img_name in img_names: img = cv2.imread(os.path.join(img_path, img_name)) img = torch.from_numpy(img).cuda().float() img = FastBaseTransform()(img.unsqueeze(0)) start = time.time() preds = net(img) print('clw: image_name: %s, inference time use %.3fs' % (img_name, time.time() - start)) # inference time use 0.023s, 550x550 # start = time.time() h, w = img.shape[2:] result = postprocess( preds, w, h, crop_masks=True, score_threshold=0.3) # classes, scores, boxes, masks 按照score排序 # top_k = 10 # classes, scores, boxes, masks = [x[:top_k].cpu().numpy() for x in result] # clw note TODO: 是否有必要只取top_k个? # print('clw: postprocess time use %.3fs' % (time.time() - start)) # 0.001s ### 顺序遍历result[0],找到第一个是0的值,也就是梨,也就拿到了相应的mask # start = time.time() bFindPear = False for i, cls_id in enumerate(result[0]): if cls_id == 0 and not bFindPear: pear_mask = result[3][i].cpu().numpy() bFindPear = True # 从梨的mask中提取轮廓 pear_outline = get_outline_from_mask(pear_mask, w, h) # print('pear_mask.sum:', pear_mask.sum()) # 124250.0 # print('pear_outline.sum:', pear_outline.sum()) # 34335.0 # print('clw: outline extract time use %.3fs' % (time.time() - start)) # 0.001s roundness = compute_roundness(pear_outline) ### result.append(roundness)