def get_model(opt, pretrained=None, trn=True, weights_FIMs=None, alpha=1.): '''Getting model and initializing. Args: pretrained, None or path to pretrained model weights trn, True for training and False for evaluating''' # Model structure model = Darknet(opt.model_config_path, opt.img_size, weights_FIMs, alpha) print(model) # Initialize model.apply(weights_init_normal) # Pretrained or not coco_weights = True if pretrained == 'weights/yolov3.weights' else False try: model.load_weights(pretrained, use_coco=coco_weights) except TypeError: pass # Cuda or not if opt.cuda: model = model.cuda() cudnn.benchmark = True # Mode = train or eval if trn: model.train() else: model.eval() return model
def eval_flowchart(init_style, para_part, reg, alpha, ablation_type): '''Main body for evaluation''' args = all_args() # Storage path 'eval/' os.makedirs(args.save_folder, exist_ok=True) # Dataset dataset = get_data(args) # Load net net = Darknet(args.model_config_path, img_size=args.img_size) # Visdom viz, epoch_aps = init_viz(args, init_style, para_part, reg, alpha, dataset) # Evaluate ckpt_path, ckpts = get_ckpts(args, init_style, para_part, reg, alpha, ablation_type) mAP_max = 0 for ckpt_idx, ckpt in enumerate(ckpts): # sample one for hyperparameter adjustment if ckpt_idx < 120: continue # Make output dir dir_name = '_'.join([ ablation_type, args.arc, args.dataset, args.set_type, init_style, para_part, reg, str(alpha), ckpt, str(ckpt_idx) ]) output_dir = get_output_dir(args.save_folder, dir_name) # Load weight args.weight_path = os.path.join(ckpt_path, ckpt) # assert os.path.isfile(args.weight_path) try: net.load_weights(args.weight_path) except FileNotFoundError as err: print(err) # Cuda or not if args.cuda: net = net.cuda() cudnn.benchmark = True net.eval() print('Finished loading model!') # Evaluation, use_07_eval False aps, mAP = test_net(output_dir, net, args.cuda, dataset, args.score_thres, args.nms_thres, use_07_eval=False, iou_thres=args.iou_thres) # If not greater than before, delete if mAP_max >= mAP: rmtree(output_dir) else: mAP_max = mAP # Visdom update_vis(viz, epoch_aps, ckpt_idx + 1, *aps, mAP)
def main(): img_size = 512 # 必须是32的整数倍 [416, 512, 608] cfg = "cfg/yolov3-spp.cfg" weights = "weights/yolov3-spp-ultralytics-{}.pt".format(img_size) img_path = "test.jpg" input_size = (img_size, img_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Darknet(cfg, img_size) model.load_state_dict(torch.load(weights, map_location=device)["model"]) model.to(device) model.eval() # init img = torch.zeros((1, 3, img_size, img_size), device=device) model(img) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device).float() img /= 255.0 # scale (0, 255) to (0, 1) img = img.unsqueeze(0) # add batch dimension t1 = torch_utils.time_synchronized() pred = model(img)[0] # only get inference result t2 = torch_utils.time_synchronized() print(t2 - t1) pred = utils.non_max_suppression(pred, conf_thres=0.3, iou_thres=0.6, multi_label=True)[0] t3 = time.time() print(t3 - t2) # process detections pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round() print(pred.shape) bboxes = pred[:, :4].detach().cpu().numpy() scores = pred[:, 4].detach().cpu().numpy() classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1 category_index = dict([(i + 1, str(i + 1)) for i in range(90)]) img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index) plt.imshow(img_o) plt.show() img_o.save("test_result.jpg")
def setup_detector(opt): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") weights_path = os.path.join( opt.weights_path, "weights_RADAR.pth" if opt.radar else "weights_LIDAR.pth") # Set up model model = Darknet(opt.model_def, img_size=cnf.BEV_WIDTH).to(device) # Load checkpoint weights model.load_state_dict(torch.load(weights_path, map_location=device)) # Eval mode model.eval() return model
def Load_Yolo(device): #Load Darknet yolo_model_def = os.path.join(yolo_path, 'config/yolov3-tiny.cfg') yolo_img_size = 416 yolo_weights_path = os.path.join(yolo_path, 'weights/yolov3-tiny.weights') model = Darknet(yolo_model_def, img_size=yolo_img_size).to(device) if yolo_weights_path.endswith(".weights"): # Load darknet weights model.load_darknet_weights(yolo_weights_path) else: # Load checkpoint weights model.load_state_dict(torch.load(yolo_weights_path)) model.eval() # Set in evaluation mode return model
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config_file", default="runs/config.json") parser.add_argument("--output_dir", default='output') parser.add_argument("--model_checkpoint") args = parser.parse_args() with open(args.config_file) as config_buffer: config = json.loads(config_buffer.read()) val_loader_dict = dict() for i, dataset_config in enumerate(config['val']["datasets"]): val_dataset = VOCDetection( img_dir=dataset_config["image_folder"], annotation_dir=dataset_config["annot_folder"], cache_dir=dataset_config["cache_dir"], split_file=dataset_config['split_file'], img_size=config['model']['input_size'], filter_labels=config['model']['labels'], multiscale=False, augment=False) val_dataset.name = dataset_config.get('name') val_loader = DataLoader(val_dataset, batch_size=config["val"]["batch_size"], collate_fn=val_dataset.collate_fn, shuffle=True) dataset_name = val_dataset.name if val_dataset.name else f"Dataset #{i}" val_loader_dict[dataset_name] = val_loader device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Darknet(config["model"]["config"]).to(device) model.load_state_dict(torch.load(args.model_checkpoint)) model.eval() result_dict = evaluate(model, val_loader_dict, config["val"]) for name, results in result_dict.items(): output_str = f"{name} evaluation results:\n" \ f"precision-{results['precision']},\n" \ f"recall-{results['recall']},\n" \ f"AP-{results['AP']},\n" \ f"F1-{results['F1']},\n" \ f"ap_class-{results['AP_class']}" print(output_str)
class Detector(object): def __init__(self, device, model_def, load_path, reg_threshold, cls_threshold, nms_threshold, image_size): self.image_size = image_size self.model = Darknet(model_def, img_size=self.image_size).to(device) # TODO # change device to GPU self.model.load_state_dict(torch.load(load_path, map_location='cpu')) self.model.eval() self.reg_threshold = reg_threshold self.cls_threshold = cls_threshold self.nms_threshold = nms_threshold self.device = device @torch.no_grad() def __call__(self, image): original_size = image.shape[:2] tensor = torch.from_numpy(image).to(self.device).permute(2, 0, 1) tensor = tensor.contiguous().float().div_(255) _, h, w = tensor.shape dim_diff = np.abs(h - w) pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) tensor = f.pad(tensor, pad, "constant", value=0) tensor = f.interpolate(tensor.unsqueeze(0), size=self.image_size, mode="nearest").squeeze_(0) result = self.model(tensor.unsqueeze_(0)) detection = non_max_suppression(result, self.reg_threshold, self.nms_threshold)[0] if detection is not None: detection = detection[detection[:, -2] > self.cls_threshold] detection = rescale_boxes(detection, self.image_size, original_size) else: print("detection result is None") return detection
def channels_select(prune_cfg, data, origin_model, aux_util, device, data_loader, select_layer, pruned_rate): with open(progress_result, 'a') as f: f.write(('\n' + '%10s' * 9 + '\n') % ('Stage', 'Change', 'MSELoss', 'AuxLoss', 'Total', 'P', 'R', '[email protected]', 'F1')) logger.info(('%10s' * 6) % ('Stage', 'Channels', 'Batch', 'MSELoss', 'AuxLoss', 'Total')) batch_size = data_loader.batch_size img_size = data_loader.dataset.img_size accumulate = 64 // batch_size hook_util = HookUtils() handles = [] n_iter = math.floor(500 / batch_size) pruning_model = Darknet(prune_cfg, img_size=(img_size, img_size)).to(device) chkpt = torch.load(progress_chkpt, map_location=device) pruning_model.load_state_dict(chkpt['model'], strict=True) aux_in_layer = aux_util.conv_layer_dict[select_layer] aux_model = aux_util.creat_aux_model(aux_in_layer) aux_model.to(device) aux_model.load_state_dict(chkpt['aux_in{}'.format(aux_in_layer)], strict=True) aux_loss_scalar = max(0.01, pow((int(aux_in_layer) + 1) / 75, 2)) del chkpt solve_sub_problem_optimizer = optim.SGD( pruning_model.module_list[int(aux_in_layer)].MaskConv2d.parameters(), lr=hyp['lr0'], momentum=hyp['momentum']) for name, child in origin_model.module_list.named_children(): if name == aux_in_layer: handles.append( child.register_forward_hook(hook_util.hook_origin_output)) if name == select_layer: handles.append( child.register_forward_hook(hook_util.hook_origin_output)) for name, child in pruning_model.module_list.named_children(): if name == aux_in_layer: handles.append( child.register_forward_hook(hook_util.hook_prune_output)) if name == select_layer: handles.append( child.register_forward_hook(hook_util.hook_prune_output)) if device.type != 'cpu' and torch.cuda.device_count() > 1: origin_model = torch.nn.parallel.DistributedDataParallel( origin_model, find_unused_parameters=True) origin_model.yolo_layers = origin_model.module.yolo_layers pruning_model = torch.nn.parallel.DistributedDataParallel( pruning_model, find_unused_parameters=True) pruning_model.yolo_layers = pruning_model.module.yolo_layers retain_channels_num = math.floor( aux_util.layer_info[select_layer]["in_channels"] * (1 - pruned_rate)) pruning_model.nc = 80 pruning_model.hyp = hyp pruning_model.arc = 'default' pruning_model.eval() aux_model.eval() MSE = nn.MSELoss(reduction='mean') mloss = torch.zeros(3).to(device) for i_k in range(retain_channels_num): data_iter = iter(data_loader) pbar = tqdm(range(n_iter), total=n_iter) print(('\n' + '%10s' * 6) % ('Stage', 'gpu_mem', 'channels', 'MSELoss', 'AuxLoss', 'Total')) for i in pbar: imgs, targets, _, _ = data_iter.next() if len(targets) == 0: continue imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) with torch.no_grad(): _ = origin_model(imgs) _, pruning_pred = pruning_model(imgs) pruning_loss, _ = compute_loss(pruning_pred, targets, pruning_model) hook_util.cat_to_gpu0() mse_loss = torch.zeros(1, device=device) aux_pred = aux_model(hook_util.prune_features['gpu0'][1], targets) aux_loss = compute_loss_for_DCP(aux_pred, targets) mse_loss += MSE(hook_util.prune_features['gpu0'][0], hook_util.origin_features['gpu0'][0]) loss = hyp['joint_loss'] * mse_loss + aux_loss + 0 * pruning_loss loss.backward() mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available( ) else 0 s = ('%10s' * 3 + '%10.3g' * 3) % ( 'Prune ' + select_layer, '%.3gG' % mem, '%g/%g' % (i_k, retain_channels_num), hyp['joint_loss'] * mse_loss, aux_loss, loss) pbar.set_description(s) # if (i + 1) % 10 == 0: # logger.info(('%10s' * 3 + '%10.3g' * 3) % # ('Prune' + select_layer, str(i_k), '%g/%g' % (i, n_iter), hyp['joint_loss'] * mse_loss, # aux_loss, loss)) hook_util.clean_hook_out() grad = pruning_model.module.module_list[int( select_layer)].MaskConv2d.weight.grad.detach()**2 grad = grad.sum((2, 3)).sqrt().sum(0) if i_k == 0: pruning_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask[:] = 1e-5 if select_layer in aux_util.sync_guide.keys(): sync_layer = aux_util.sync_guide[select_layer] pruning_model.module.module_list[int( sync_layer)].MaskConv2d.selected_channels_mask[( -1 * aux_util.layer_info[select_layer]["in_channels"] ):] = 1e-5 selected_channels_mask = pruning_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask _, indices = torch.topk(grad * (1 - selected_channels_mask), 1) pruning_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask[indices] = 1 if select_layer in aux_util.sync_guide.keys(): pruning_model.module.module_list[int( sync_layer)].MaskConv2d.selected_channels_mask[-( aux_util.layer_info[select_layer]["in_channels"] - indices)] = 1 pruning_model.zero_grad() pbar = tqdm(range(n_iter), total=n_iter) print(('\n' + '%10s' * 6) % ('Stage', 'gpu_mem', 'channels', 'MSELoss', 'AuxLoss', 'Total')) for i in pbar: imgs, targets, _, _ = data_iter.next() if len(targets) == 0: continue imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) with torch.no_grad(): _ = origin_model(imgs) _, pruning_pred = pruning_model(imgs) pruning_loss, _ = compute_loss(pruning_pred, targets, pruning_model) hook_util.cat_to_gpu0() mse_loss = torch.zeros(1, device=device) aux_pred = aux_model(hook_util.prune_features['gpu0'][1], targets) aux_loss = compute_loss_for_DCP(aux_pred, targets) mse_loss += MSE(hook_util.prune_features['gpu0'][0], hook_util.origin_features['gpu0'][0]) loss = hyp[ 'joint_loss'] * mse_loss + aux_loss_scalar * aux_loss + 0 * pruning_loss loss.backward() if i % accumulate == 0: solve_sub_problem_optimizer.step() solve_sub_problem_optimizer.zero_grad() mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available( ) else 0 mloss = (mloss * i + torch.cat([hyp['joint_loss'] * mse_loss, aux_loss, loss ]).detach()) / (i + 1) s = ('%10s' * 3 + '%10.3g' * 3) % ( 'SubProm ' + select_layer, '%.3gG' % mem, '%g/%g' % (i_k, retain_channels_num), *mloss) pbar.set_description(s) if (i + 1) % n_iter == 0: logger.info(('%10s' * 3 + '%10.3g' * 3) % ('SubPro' + select_layer, str(i_k), '%g/%g' % (i, n_iter), *mloss)) hook_util.clean_hook_out() for handle in handles: handle.remove() greedy_indices = pruning_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask < 1 pruning_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask[greedy_indices] = 0 res, _ = test.test(prune_cfg, data, batch_size=batch_size * 2, img_size=416, model=pruning_model, conf_thres=0.1, iou_thres=0.5, save_json=False, dataloader=None) chkpt = torch.load(progress_chkpt, map_location=device) chkpt['current_layer'] = aux_util.next_prune_layer(select_layer) chkpt['epoch'] = -1 chkpt['model'] = pruning_model.module.state_dict() if type( pruning_model ) is nn.parallel.DistributedDataParallel else pruning_model.state_dict() chkpt['optimizer'] = None torch.save(chkpt, progress_chkpt) torch.save(chkpt, last) del chkpt with open(progress_result, 'a') as f: f.write(('%10s' * 2 + '%10.3g' * 7) % ('Pruning ' + select_layer, str(aux_util.layer_info[select_layer]['in_channels']) + '->' + str(retain_channels_num), *mloss, *res[:4]) + '\n') torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") os.makedirs("output", exist_ok=True) # Set up model model = Darknet(opt.model_def, img_size=opt.img_size).to(device) if opt.weights_path.endswith(".weights"): # Load darknet weights model.load_darknet_weights(opt.weights_path) else: # Load checkpoint weights model.load_state_dict(torch.load(opt.weights_path)["model_state_dict"]) model.eval() # Set in evaluation mode dataloader = DataLoader( ImageFolder(opt.image_folder, img_size=opt.img_size), batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_cpu, ) classes = load_classes(opt.class_path) # Extracts class labels from file Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor imgs = [] # Stores image paths img_detections = [] # Stores detections for each image index
class YOLOV4(object): if CWD == THIS_DIR: _defaults = { "weights": "weights/yolov4.weights", "config": "cfg/yolov4.cfg", "classes_path": 'cfg/coco.names', "thresh": 0.5, "nms_thresh": 0.4, "model_image_size": (608, 608), "max_batch_size": 4, "half": True } else: _defaults = { "weights": "yolov4_pytorch/weights/yolov4.weights", "config": "yolov4_pytorch/cfg/yolov4.cfg", "classes_path": 'yolov4_pytorch/cfg/coco.names', "thresh": 0.5, "nms_thresh": 0.4, "model_image_size": (608, 608), "max_batch_size": 4, "half": True } def __init__(self, bgr=True, gpu_device=0, **kwargs): self.__dict__.update(self._defaults) # set up default values # for portability between keras-yolo3/yolo.py and this if 'model_path' in kwargs: kwargs['weights'] = kwargs['model_path'] if 'score' in kwargs: kwargs['thresh'] = kwargs['score'] self.__dict__.update(kwargs) # update with user overrides self.class_names = self._get_class() self.model = Darknet(self.config) self.model.load_darknet_weights(self.weights) self.device = gpu_device self.model.cuda(self.device) self.model.eval() self.bgr = bgr if self.half: self.model.half() # warm up self._detect([np.zeros((10, 10, 3), dtype=np.uint8)]) print('Warmed up!') def _get_class(self): with open(self.classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names def _detect(self, list_of_imgs): inputs = [] for img in list_of_imgs: if self.bgr: img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # print('bgr: {}'.format(img.shape)) # print('size: {}'.format(self.model_image_size)) image = cv2.resize(img, self.model_image_size) # print('image: {}'.format(image.shape)) inputs.append(np.expand_dims(np.array(image), axis=0)) images = np.concatenate(inputs, 0) # print('images: {}'.format(images.shape)) images = torch.from_numpy(images.transpose(0, 3, 1, 2)).float().div(255.0) images = images.cuda() images = torch.autograd.Variable(images) if self.half: images = images.half() batches = [] for i in range(0, len(images), self.max_batch_size): these_imgs = images[i:i + self.max_batch_size] batches.append(these_imgs) feature_list = None with torch.no_grad(): for batch in batches: img = batch.cuda(self.device) features = self.model(img) if feature_list is None: feature_list = features else: feature_list = torch.cat((feature_list, features)) # feature_list: (batch, height * width * num_anchors, 5 + num_classes) return feature_list def detect_get_box_in(self, images, box_format='ltrb', classes=None, buffer_ratio=0.0): ''' Params ------ - images : ndarray-like or list of ndarray-like - box_format : string of characters representing format order, where l = left, t = top, r = right, b = bottom, w = width and h = height - classes : list of string, classes to focus on - buffer : float, proportion of buffer around the width and height of the bounding box Returns ------- if one ndarray given, this returns a list (boxes in one image) of tuple (box_infos, score, predicted_class), else if a list of ndarray given, this return a list (batch) containing the former as the elements, where, - box_infos : list of floats in the given box format - score : float, confidence level of prediction - predicted_class : string ''' single = False if isinstance(images, list): if len(images) <= 0: return None else: assert all(isinstance(im, np.ndarray) for im in images) elif isinstance(images, np.ndarray): images = [images] single = True res = self._detect(images) frame_shapes = [image.shape for image in images] all_dets = self._postprocess(res, shapes=frame_shapes, box_format=box_format, classes=classes, buffer_ratio=buffer_ratio) if single: return all_dets[0] else: return all_dets def get_detections_dict(self, frames, classes=None, buffer_ratio=0.0): ''' Params: frames, list of ndarray-like Returns: detections, list of dict, whose key: label, confidence, t, l, w, h ''' if frames is None or len(frames) == 0: return None all_dets = self.detect_get_box_in(frames, box_format='tlbrwh', classes=classes, buffer_ratio=buffer_ratio) all_detections = [] for dets in all_dets: detections = [] for tlbrwh, confidence, label in dets: top, left, bot, right, width, height = tlbrwh detections.append({ 'label': label, 'confidence': confidence, 't': top, 'l': left, 'b': bot, 'r': right, 'w': width, 'h': height }) all_detections.append(detections) return all_detections def _nms(self, predictions): predictions[..., :4] = self.xywh2p1p2(predictions[..., :4]) outputs = [None for _ in range(len(predictions))] for i, image_pred in enumerate(predictions): image_pred = image_pred[image_pred[:, 4] >= self.thresh] # If none anchor are remaining => process next image if not image_pred.size(0): continue # Object confidence times class confidence (n, ) * (n, ) score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True) detections = torch.cat( (image_pred[:, :5], class_confs.type( predictions.dtype), class_preds.type(predictions.dtype)), dim=1) keep = batched_nms(image_pred[:, :4].float(), score, class_preds[:, 0], self.nms_thresh) outputs[i] = detections[keep] return outputs @staticmethod def xywh2p1p2(x): y = x.new(x.shape) y[..., 0] = x[..., 0] - x[..., 2] / 2. y[..., 1] = x[..., 1] - x[..., 3] / 2. y[..., 2] = x[..., 0] + x[..., 2] / 2. y[..., 3] = x[..., 1] + x[..., 3] / 2. return y @staticmethod def p1p2Toxywh(x): y = x.new(x.shape) y[..., 0] = x[..., 0] y[..., 1] = x[..., 1] y[..., 2] = x[..., 2] - x[..., 0] y[..., 3] = x[..., 3] - x[..., 1] return y def _postprocess(self, outputs, shapes, box_format='ltrb', classes=None, buffer_ratio=0.0): outputs = self._nms(outputs) detections = [] for i, frame_bbs in enumerate(outputs): im_height, im_width, _ = shapes[i] if frame_bbs is None: detections.append([]) continue frame_bbs = self._resize_boxes(frame_bbs, self.model_image_size, (im_height, im_width)) frame_dets = [] for box in frame_bbs: pred_box = self.p1p2Toxywh(box[:4]).data.cpu().numpy() # box = box.data.cpu().numpy() cls_conf = box[4].item() cls_id = box[-1] cls_name = self.class_names[int(cls_id)] if classes is not None and cls_name not in classes: continue left, top, w, h = pred_box right = left + w bottom = top + h width = right - left + 1 height = bottom - top + 1 width_buffer = width * buffer_ratio height_buffer = height * buffer_ratio top = max(0.0, top - 0.5 * height_buffer) left = max(0.0, left - 0.5 * width_buffer) bottom = min(im_height - 1.0, bottom + 0.5 * height_buffer) right = min(im_width - 1.0, right + 0.5 * width_buffer) box_infos = [] for c in box_format: if c == 't': box_infos.append(int(round(top))) elif c == 'l': box_infos.append(int(round(left))) elif c == 'b': box_infos.append(int(round(bottom))) elif c == 'r': box_infos.append(int(round(right))) elif c == 'w': box_infos.append(int(round(width + width_buffer))) elif c == 'h': box_infos.append(int(round(height + height_buffer))) else: assert False, 'box_format given in detect unrecognised!' assert len(box_infos) > 0, 'box infos is blank' detection = (box_infos, cls_conf, cls_name) frame_dets.append(detection) detections.append(frame_dets) return detections @staticmethod def _resize_boxes(boxes, current_dim, original_shape): h_ratio = original_shape[0] / current_dim[0] w_ratio = original_shape[1] / current_dim[1] boxes[..., 0] *= w_ratio boxes[..., 1] *= h_ratio boxes[..., 2] *= w_ratio boxes[..., 3] *= h_ratio return boxes
def main(): img_size = 512 # 必须是32的整数倍 [416, 512, 608] cfg = "/home/mist/yolov3_spp/cfg/yolov3-spp.cfg" # 改成生成的.cfg文件 weights = "/home/mist/yolov3_spp/weights/yolov3spp-29.pt".format( img_size) # 改成自己训练好的权重文件 json_path = "/home/mist/yolov3_spp/data/pascal_voc_classes.json" # json标签文件 img_path = "test.jpg" assert os.path.exists(cfg), "cfg file {} dose not exist.".format(cfg) assert os.path.exists(weights), "weights file {} dose not exist.".format( weights) assert os.path.exists(json_path), "json file {} dose not exist.".format( json_path) assert os.path.exists(img_path), "image file {} dose not exist.".format( img_path) json_file = open(json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} input_size = (img_size, img_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Darknet(cfg, img_size) model.load_state_dict(torch.load(weights, map_location=device)["model"]) model.to(device) model.eval() with torch.no_grad(): # init img = torch.zeros((1, 3, img_size, img_size), device=device) model(img) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device).float() img /= 255.0 # scale (0, 255) to (0, 1) img = img.unsqueeze(0) # add batch dimension t1 = torch_utils.time_synchronized() pred = model(img)[0] # only get inference result t2 = torch_utils.time_synchronized() print(t2 - t1) pred = utils.non_max_suppression(pred, conf_thres=0.1, iou_thres=0.6, multi_label=True)[0] t3 = time.time() print(t3 - t2) if pred is None: print("No target detected.") exit(0) # process detections pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round() print(pred.shape) bboxes = pred[:, :4].detach().cpu().numpy() scores = pred[:, 4].detach().cpu().numpy() classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1 img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index) plt.imshow(img_o) plt.show() img_o.save("test_result.jpg")
model3 = Darknet(os.path.join( BASE_DIR, "yolo_v3/config/yolov3-custom.cfg")).to(device) model3.load_state_dict( torch.load(os.path.join(models_path, "yolo_v3_4_25.pt"), map_location=device)) dataset = MyTestDataset(split='stage1_train', transforms=get_test_transforms(rescale_size=(416, 416))) test_loader = DataLoader(dataset, batch_size=1, num_workers=0, shuffle=False) model.eval() model2.eval() model3.eval() for i, (image, targets) in enumerate(test_loader): image = image[0].to(device=device) name = targets["name"][0] start_time = time.time() with torch.no_grad(): outputs = model(image) outputs2 = model2(image) outputs3 = model3(image) outputs = non_max_suppression(outputs, conf_thres=0.5) outputs2 = non_max_suppression(outputs2, conf_thres=0.5) outputs3 = non_max_suppression(outputs3, conf_thres=0.5) elapsed_time = time.time() - start_time
def mask_catch(input, output): parser = argparse.ArgumentParser() parser.add_argument(u"--input_file_path", type=unicode, default=input, help=u"path to images directory") parser.add_argument(u"--output_path", type=unicode, default=output, help=u"output image directory") parser.add_argument(u"--model_def", type=unicode, default=u"data/yolov3_mask.cfg", help=u"path to model definition file") parser.add_argument(u"--weights_path", type=unicode, default=u"checkpoints/yolov3_ckpt_499.pth", help=u"path to weights file") parser.add_argument(u"--class_path", type=unicode, default=u"data/mask_dataset.names", help=u"path to class label file") parser.add_argument(u"--conf_thres", type=float, default=0.8, help=u"object confidence threshold") parser.add_argument(u"--nms_thres", type=float, default=0.3, help=u"iou thresshold for non-maximum suppression") parser.add_argument(u"--frame_size", type=int, default=416, help=u"size of each image dimension") opt = parser.parse_args() # Output directory os.makedirs(opt.output_path, exist_ok=True) # checking for GPU device = torch.device(u"cuda" if torch.cuda.is_available() else u"cpu") # Set up model model = Darknet(opt.model_def, img_size=opt.frame_size).to(device) # loading weights if opt.weights_path.endswith(u".weights"): model.load_darknet_weights(opt.weights_path) # Load weights else: model.load_state_dict(torch.load(opt.weights_path)) # Load checkpoints # Set in evaluation mode model.eval() # Extracts class labels from file classes = load_classes(opt.class_path) # ckecking for GPU for Tensor Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor print u"\nPerforming object detection:" # for text in output t_size = cv2.getTextSize(u" ", cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] for imagename in os.listdir(opt.input_file_path): print u"\n" + imagename + u"_______" image_path = os.path.join(opt.input_file_path, imagename) print image_path # frame extraction org_img = cv2.imread(image_path) # Original image width and height i_height, i_width = org_img.shape[:2] # resizing => [BGR -> RGB] => [[0...255] -> [0...1]] => [[3, 416, 416] -> [416, 416, 3]] # => [[416, 416, 3] => [416, 416, 3, 1]] => [np_array -> tensor] => [tensor -> variable] # resizing to [416 x 416] # Create a black image x = y = i_height if i_height > i_width else i_width # Black image img = np.zeros((x, y, 3), np.uint8) # Putting original image into black image start_new_i_height = int((y - i_height) / 2) start_new_i_width = int((x - i_width) / 2) img[start_new_i_height:(start_new_i_height + i_height), start_new_i_width:(start_new_i_width + i_width)] = org_img #resizing to [416x 416] img = cv2.resize(img, (opt.frame_size, opt.frame_size)) # [BGR -> RGB] img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # [[0...255] -> [0...1]] img = np.asarray(img) / 255 # [[3, 416, 416] -> [416, 416, 3]] img = np.transpose(img, [2, 0, 1]) # [[416, 416, 3] => [416, 416, 3, 1]] img = np.expand_dims(img, axis=0) # [np_array -> tensor] img = torch.Tensor(img) # plt.imshow(img[0].permute(1, 2, 0)) # plt.show() # [tensor -> variable] img = Variable(img.type(Tensor)) # Get detections with torch.no_grad(): detections = model(img) detections = non_max_suppression_output(detections, opt.conf_thres, opt.nms_thres) # print(detections) # For accommodate results in original frame mul_constant = x / opt.frame_size #We should set a variable for the number of nomask people. i is the variable i = 0 # For each detection in detections for detection in detections: if detection is not None: print u"{0} Detection found".format(len(detection)) for x1, y1, x2, y2, conf, cls_conf, cls_pred in detection: # Accommodate bounding box in original frame x1 = int(x1 * mul_constant - start_new_i_width) y1 = int(y1 * mul_constant - start_new_i_height) x2 = int(x2 * mul_constant - start_new_i_width) y2 = int(y2 * mul_constant - start_new_i_height) # Bounding box making and setting Bounding box title if (int(cls_pred) == 0): # WITH_MASK cv2.rectangle(org_img, (x1, y1), (x2, y2), (0, 255, 0), 2) else: #WITHOUT_MASK i += 1 cv2.rectangle(org_img, (x1, y1), (x2, y2), (0, 0, 255), 2) cv2.putText(org_img, classes[int(cls_pred)] + u": %.2f" % conf, (x1, y1 + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 2) u"""------------Ready to save!-----------------""" import time now = time.strftime(u"%Y-%m-%d-%H_%M_%S", time.localtime(time.time())) #num is the number of people num = len(detection) #na=now + '-' + 'NUM:%d'%num +'-'+ 'Nom:%d'%i+'-'+'.jpg' u"""------------txt_save-----------------""" u"""------------image_save-----------------""" na = u'result.jpg' out_filepath = os.path.join(opt.output_path, na) cv2.imwrite(out_filepath, org_img) #org_img is final result with frames #naa = now + '-' + 'NUM:%d' % num + '-' + 'Nom:%d' % i #ssh_scp_put('172.21.39.222',22,'tensor','tensor',out_filepath,'/home/tensor/eden/%s.jpg'%naa) #upload_img(na) #os.remove(out_filepath) signal = 1 #we first set signal only 1 if i == 0: signal = 0 print u"Signal is ", signal print u"Finish to save!!!" msg = now + u'-' + u'NUM:%d' % num + u'-' + u'Nomask:%d' % i + u'-' nam = u'info.txt' full_path = os.path.join(opt.output_path, nam) print u"----------------" file = open(full_path, u'w') file.write(msg) cv2.destroyAllWindows() return signal
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config_file", default="config/runs/config.json") parser.add_argument("--output_dir", default='output') args = parser.parse_args() with open(args.config_file) as config_buffer: config = json.loads(config_buffer.read()) exp_name = get_experiment_name(config) print(f"Experiment name: {exp_name}") out_dir = os.path.join(args.output_dir, exp_name) if os.path.exists(out_dir): print("experiment dir already exists! Removing...") shutil.rmtree(out_dir) os.makedirs(out_dir) log_dir = f"{out_dir}/logs" checkpoint_dir = f"{out_dir}/checkpoints" if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) tb_logger = SummaryWriter(log_dir) logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', handlers=[ logging.FileHandler(f"{out_dir}/log.log"), logging.StreamHandler(sys.stdout) ], level=logging.INFO) logger = logging.getLogger(__name__) logging.info("New session") seed = config["train"]["seed"] if seed > 0: np.random.seed(seed) torch.manual_seed(seed) ############################### # Prepare data loaders ############################### print("Loading datasets...") if config['val']['validate']: train_loader, val_concat_loader, val_loader_dict = prepare_dataloaders( config) else: train_loader = prepare_dataloaders(config) print("Loaded!") if config["train"]["debug"]: image_batch, target = next(iter(train_loader)) draw_image_batch_with_targets(image_batch[:4], target, cols=2) if config['val']['validate']: val_image_batch, val_target = next(iter(val_concat_loader)) draw_image_batch_with_targets(val_image_batch[:4], val_target, cols=2) ############################### # Construct the model ############################### device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Darknet(config["model"]["config"]).to(device) model.apply(weights_init_normal) print("Model initialized!") if config["train"]["freeze_feature_extractor"]: model.freeze_feature_extractor() print(f"Trainable params: {get_trainable_params_num(model):,}") # If specified we start from checkpoint if config["model"]["pretrained_weights"]: if config["model"]["pretrained_weights"].endswith(".pth"): model.load_state_dict( torch.load(config["model"]["pretrained_weights"])) else: model.load_darknet_weights(config["model"]["pretrained_weights"]) print("Pretrained weights loaded!") optimizer = torch.optim.Adam(model.parameters(), lr=config["train"]["learning_rate"]) ############################### # Training ############################### batches_done = 0 grad_accumulations = config["train"]["gradient_accumulations"] save_every = config["train"]["save_every"] if config["val"]["validate"]: val_iterator = iter(val_concat_loader) for epoch in range(config["train"]["nb_epochs"]): effective_loss = 0 loss_history = torch.zeros(len(train_loader)) logger.info(f"Epoch {epoch} started!") bar = tqdm(train_loader) for i, (image_batch, bboxes) in enumerate(bar): model.train() image_batch = image_batch.to(device) bboxes = bboxes.to(device) loss, outputs = model(image_batch, bboxes) effective_loss += loss.item() loss_history[i] = loss.item() loss.backward() if i % grad_accumulations == 0: # Accumulates gradient before each step optimizer.step() if config["train"]["gradient_clipping"]: torch.nn.utils.clip_grad_norm_(model.parameters(), 5) grad_norm = get_grad_norm(model) optimizer.zero_grad() if config["val"]["validate"]: model.eval() try: val_image_batch, val_bboxes = next(val_iterator) except StopIteration: val_iterator = iter(val_concat_loader) val_image_batch, val_bboxes = next(val_iterator) val_image_batch = val_image_batch.to(device) val_bboxes = val_bboxes.to(device) with torch.no_grad(): val_loss, val_outputs = model(val_image_batch, val_bboxes) tb_logger.add_scalar("loss/validation", val_loss, batches_done) bar.set_description( f"Loss: {effective_loss / grad_accumulations:.6f}") batches_done += 1 # Tensorboard logging for metric_name in metrics: metric_dict = {} for j, yolo_layer in enumerate(model.yolo_layers): metric_dict[f"yolo_{j}"] = yolo_layer.metrics[ metric_name] if metric_name == 'loss': metric_dict["overall"] = loss.item() tb_logger.add_scalars(metric_name, metric_dict, batches_done) tb_logger.add_scalar("grad_norm", grad_norm, batches_done) tb_logger.add_scalar("loss/effective_loss", effective_loss, batches_done) effective_loss = 0 # save model if save_every > 0 and batches_done % save_every == 0: torch.save(model.state_dict(), f"{checkpoint_dir}/yolov3_{batches_done}.pth") epoch_loss = loss_history.mean() print(f"Epoch loss: {epoch_loss}") tb_logger.add_scalar("epoch_loss", epoch_loss, epoch) if config["val"]["validate"]: result_dict = evaluate(model, val_loader_dict, config["val"]) for name, results in result_dict.items(): output_str = f"{name} evaluation results:\n" \ f"precision-{results['precision']},\n" \ f"recall-{results['recall']},\n" \ f"AP-{results['AP']},\n" \ f"F1-{results['F1']},\n" \ f"ap_class-{results['AP_class']}" logging.info(output_str) print(output_str) tb_logger.add_scalar(f"val_precision/{name}", results['precision'], epoch) tb_logger.add_scalar(f"val_recall/{name}", results['recall'], epoch) tb_logger.add_scalar(f"val_F1/{name}", results['F1'], epoch) tb_logger.add_scalar(f"val_AP/{name}", results['AP'], epoch) # save model torch.save(model.state_dict(), f"{checkpoint_dir}/yolov3_epoch_{epoch}.pth")
opt = parser.parse_args() print(opt) os.makedirs("output", exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Darknet(opt.model_def, img_size=opt.img_size).to(device) if opt.weights_path.endswith(".weights"): """Load darknet weights""" model.load_darknet_weights(opt.weights_path) else: """Load checkpoint weights""" model.load_state_dict(torch.load(opt.weights_path)) model.eval() dataloader = DataLoader( ImageFolder(opt.image_folder, img_size=opt.img_size), batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_cpu, ) classes = load_classes(opt.class_path) # Extracts class labels from file imgs = [] # Stores image paths img_detections = [] # Stores detections for each image index print("\nPerforming object detection:") prev_time = time.time()
def detect_shoes(img, conf_thres=0.1, nms_thres=0.4, box_extension=0): '''Detect shoes in an image. Given an image, detect where the shoes are and output the bounding box coordinates, class confidence scores and confidence score. Input: - img: image data from Image.open(img_path). - conf_thres: confidence score threshold. Float. - nms_thres: threshold for non maximum suppression. Output: - croppend images? - bounding box coordinates - confidence scores ''' model_def = 'config/yolov3-openimages.cfg' weights_path = 'config/yolov3-openimages.weights' class_path = 'config/oidv6.names' conf_thres = 0.1 nms_thres = 0.4 batch_size = 1 n_cpu = 0 img_size = 416 # Extract image as PyTorch tensor img_original = transforms.ToTensor()(img) img_shape_original = img_original.shape.permute(1,2,0) # Pad to square resolution img, _ = pad_to_square(img, 0) # Resize img = resize(img, img_size) img = img.unsqueeze_(0) # Set up device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Set up model model = Darknet(model_def, img_size=img_size).to(device) model.load_darknet_weights(weights_path) model.eval() # Set in evaluation mode classes = load_classes(class_path) # Extracts class labels from file Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor input_imgs = Variable(img.type(Tensor)) # Get detections with torch.no_grad(): detections = model(input_imgs) # (B, A, ) detections = non_max_suppression_for_footwear(detections, conf_thres, nms_thres)[0] if detections is not None: detections = rescale_boxes(detections, img_size, img_shape_original[:2]) cropped_imgs = [] bbox_coords = [] for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: x1 = round(x1) y1 = round(y1) x2 = round(x2) y2 = round(y2) cropped_imgs.append(img_original[:, x1:x2, y1:y2]) bbox_coords.append([x1,y1,x2,y2]) return cropped_imgs, bbox_coords else: return None, None # Bounding-box colors cmap = plt.get_cmap("tab20b") colors = [cmap(i) for i in np.linspace(0, 1, 20)] # # Create plot # img = np.array(Image.open(img_path)) # plt.figure() # fig, ax = plt.subplots(1) # ax.imshow(img) # # Draw bounding boxes and labels of detections # if detections is not None: # # Rescale boxes to original image # detections = rescale_boxes(detections, img_size, img.shape[:2]) # unique_labels = detections[:, -1].cpu().unique() # n_cls_preds = len(unique_labels) # bbox_colors = random.sample(colors, n_cls_preds) # for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: # print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item())) # box_w = x2 - x1 # box_h = y2 - y1 # color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] # # Create a Rectangle patch # bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none") # # Add the bbox to the plot # ax.add_patch(bbox) # # Add label # plt.text( # x1, # y1, # s=classes[int(cls_pred)]+', %.2f'%conf.item(), # color="white", # verticalalignment="top", # bbox={"color": color, "pad": 0}, # ) # # Save generated image with detections # plt.axis("off")
def main(): # load model device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model = Darknet(cfg.MODEL, img_size=cfg.SIZE).to(device) model.load_darknet_weights(cfg.WEIGHTS) model.eval() # coco classes classes = load_classes(cfg.CLASSES) # animals and person app_classes = [0, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] # tensor type Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor # create video capture cap = cv2.VideoCapture('udp://127.0.0.1:5000', cv2.CAP_FFMPEG) if not cap.isOpened(): print('VideoCapture not opened') exit(-1) # preprocess pipeline t = transforms.Compose([ transforms.Resize((cfg.SIZE, cfg.SIZE)), transforms.ToTensor() ]) # tracker tracker = Sort() # bbox colors colors=[ (255,0,0), (0,255,0), (0,0,255), (255,0,255), (128,0,0), (0,128,0), (0,0,128), (128,0,128), (128,128,0), (0,128,128) ] # process stream while True: # read frame ret, frame = cap.read() # frame = cv2.flip(cv2.flip(frame, 0), 1) orig = frame frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = Image.fromarray(frame) # process image img = t(img).unsqueeze(0).type(Tensor) with torch.no_grad(): detections = model(img) detections = non_max_suppression(detections, cfg.CONF, cfg.NMS) detections = detections[0] if detections is not None: # track objects tracked_objects = tracker.update(detections.cpu()) det = rescale_boxes(tracked_objects, cfg.SIZE, frame.shape[:2]) for x1, y1, x2, y2, obj_id, cls_pred in det: # ignore not necessary classes if int(cls_pred) not in app_classes: continue # draw bbox color = colors[int(obj_id) % len(colors)] cls = classes[int(cls_pred)] x1, x2, y1, y2 = int(x1), int(x2), int(y1), int(y2) cv2.rectangle(orig, (x1, y1), (x2, y2), color, 2) cv2.putText( orig, cls + '-' + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3 ) cv2.imshow('YoloV3', orig) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def greedy_channel_select(origin_model, prune_cfg, origin_weights, select_layer, device, aux_util, data_loader, pruned_rate): init_state_dict = mask_converted(prune_cfg, origin_weights, target=None) prune_model = Darknet(prune_cfg).to(device) prune_model.load_state_dict(init_state_dict, strict=True) del init_state_dict solve_sub_problem_optimizer = optim.SGD( prune_model.module_list[int(select_layer)].MaskConv2d.parameters(), lr=hyp['lr0'], momentum=hyp['momentum']) hook_util = HookUtils() handles = [] info = aux_util.layer_info[int(select_layer)] in_channels = info['in_channels'] remove_k = math.floor(in_channels * pruned_rate) k = in_channels - remove_k for name, child in origin_model.module_list.named_children(): if name == select_layer: handles.append( child.BatchNorm2d.register_forward_hook( hook_util.hook_origin_input)) aux_idx = aux_util.conv_layer_dict[select_layer] hook_layer_aux = aux_util.down_sample_layer[aux_idx] for name, child in prune_model.module_list.named_children(): if name == select_layer: handles.append( child.BatchNorm2d.register_forward_hook( hook_util.hook_prune_input)) elif name == hook_layer_aux: handles.append( child.register_forward_hook(hook_util.hook_prune_input)) aux_net = aux_util.creat_aux_list(416, device, conv_layer_name=select_layer) chkpt_aux = torch.load(aux_weight, map_location=device) aux_net.load_state_dict(chkpt_aux['aux{}'.format(aux_idx)]) del chkpt_aux if device.type != 'cpu' and torch.cuda.device_count() > 1: prune_model = torch.nn.parallel.DistributedDataParallel( prune_model, find_unused_parameters=True) prune_model.yolo_layers = prune_model.module.yolo_layers aux_net = torch.nn.parallel.DistributedDataParallel( aux_net, find_unused_parameters=True) nb = len(data_loader) prune_model.nc = 80 prune_model.hyp = hyp prune_model.arc = 'default' prune_model.eval() aux_net.eval() MSE = nn.MSELoss(reduction='mean') greedy = torch.zeros(k) for i_k in range(k): pbar = tqdm(enumerate(data_loader), total=nb) print(('\n' + '%10s' * 8) % ('Stage', 'gpu_mem', 'iter', 'MSELoss', 'PdLoss', 'AuxLoss', 'Total', 'targets')) for i, (imgs, targets, _, _) in pbar: if len(targets) == 0: continue imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) with torch.no_grad(): _ = origin_model(imgs) _, pruning_pred = prune_model(imgs) pruning_loss, _ = compute_loss(pruning_pred, targets, prune_model) hook_util.cat_to_gpu0('prune') aux_pred = aux_net(hook_util.prune_features['gpu0'][1]) aux_loss, _ = AuxNetUtils.compute_loss_for_aux( aux_pred, aux_net, targets) mse_loss = torch.zeros(1).to(device) mse_loss += MSE(hook_util.prune_features['gpu0'][0], hook_util.origin_features['gpu0'][0]) loss = hyp['joint_loss'] * mse_loss + pruning_loss + aux_loss loss.backward() mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available( ) else 0 s = ('%10s' * 3 + '%10.3g' * 5) % ( 'Pruning ' + select_layer, '%.3gG' % mem, '%g/%g' % (i_k, k), mse_loss, pruning_loss, aux_loss, loss, len(targets)) pbar.set_description(s) hook_util.clean_hook_out('origin') hook_util.clean_hook_out('prune') grad = prune_model.module.module_list[int( select_layer)].MaskConv2d.weight.grad.detach().clone()**2 grad = grad.sum((2, 3)).sqrt().sum(0) if i_k == 0: prune_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask[:] = 1e-5 _, non_greedy_indices = torch.topk(grad, k) logger.info('non greedy layer{}: selected==>{}'.format( select_layer, str(non_greedy_indices))) selected_channels_mask = prune_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask _, indices = torch.topk(grad * (1 - selected_channels_mask), 1) prune_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask[indices] = 1 greedy[i_k] = indices logger.info('greedy layer{} iter{}: indices==>{}'.format( select_layer, str(i_k), str(indices))) prune_model.zero_grad() pbar = tqdm(enumerate(data_loader), total=nb) mloss = torch.zeros(4).to(device) print(('\n' + '%10s' * 8) % ('Stage', 'gpu_mem', 'iter', 'MSELoss', 'PdLoss', 'AuxLoss', 'Total', 'targets')) for i, (imgs, targets, _, _) in pbar: if len(targets) == 0: continue imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) with torch.no_grad(): _ = origin_model(imgs) _, pruning_pred = prune_model(imgs) pruning_loss, _ = compute_loss(pruning_pred, targets, prune_model) hook_util.cat_to_gpu0('prune') aux_pred = aux_net(hook_util.prune_features['gpu0'][1]) aux_loss, _ = AuxNetUtils.compute_loss_for_aux( aux_pred, aux_net, targets) mse_loss = torch.zeros(1).to(device) mse_loss += MSE(hook_util.prune_features['gpu0'][0], hook_util.origin_features['gpu0'][0]) loss = hyp['joint_loss'] * mse_loss + pruning_loss + aux_loss loss.backward() solve_sub_problem_optimizer.step() solve_sub_problem_optimizer.zero_grad() mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available( ) else 0 mloss = (mloss * i + torch.cat( [mse_loss, pruning_loss, aux_loss, loss]).detach()) / (i + 1) s = ('%10s' * 3 + '%10.3g' * 5) % ('SubProm ' + select_layer, '%.3gG' % mem, '%g/%g' % (i_k, k), *mloss, len(targets)) pbar.set_description(s) hook_util.clean_hook_out('origin') hook_util.clean_hook_out('prune') for handle in handles: handle.remove() logger.info( ("greedy layer{}: selected==>{}".format(select_layer, str(greedy))))
class BBDetection(): def __init__(self): print(os.getcwd()) self.model_cfg = "./src/akhenaten_dv/scripts/Perception/BBoxDetection/model_cfg/yolo_baseline_tiny.cfg" self.weights_path = './src/akhenaten_dv/scripts/Perception/BBoxDetection/7.weights' self.conf_thres = 0.8 self.nms_thres = 0.25 self.vanilla_anchor = False self.xy_loss = 2 self.wh_loss = 1.6 self.no_object_loss = 25 self.object_loss = 0.1 cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if cuda else 'cpu') random.seed(0) torch.manual_seed(0) if cuda: torch.cuda.manual_seed(0) torch.cuda.manual_seed_all(0) torch.backends.cudnn.benchmark = True torch.cuda.empty_cache() self.model = Darknet(config_path=self.model_cfg, xy_loss=self.xy_loss, wh_loss=self.wh_loss, no_object_loss=self.no_object_loss, object_loss=self.object_loss, vanilla_anchor=self.vanilla_anchor) # Load weights self.model.load_weights(self.weights_path, self.model.get_start_weight_dim()) self.model.to(self.device, non_blocking=True) def detect(self, cv_img): cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB) img = img_pil.fromarray(cv_img) w, h = img.size new_width, new_height = self.model.img_size() pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width) img = torchvision.transforms.functional.pad(img, padding=(pad_w, pad_h, pad_w, pad_h), fill=(127, 127, 127), padding_mode="constant") img = torchvision.transforms.functional.resize(img, (new_height, new_width)) bw = self.model.get_bw() if bw: img = torchvision.transforms.functional.to_grayscale( img, num_output_channels=1) img = torchvision.transforms.functional.to_tensor(img) img = img.unsqueeze(0) with torch.no_grad(): self.model.eval() img = img.to(self.device, non_blocking=True) # output,first_layer,second_layer,third_layer = model(img) output = self.model(img) for detections in output: detections = detections[detections[:, 4] > self.conf_thres] box_corner = torch.zeros((detections.shape[0], 4), device=detections.device) xy = detections[:, 0:2] wh = detections[:, 2:4] / 2 box_corner[:, 0:2] = xy - wh box_corner[:, 2:4] = xy + wh probabilities = detections[:, 4] nms_indices = nms(box_corner, probabilities, self.nms_thres) main_box_corner = box_corner[nms_indices] if nms_indices.shape[0] == 0: continue bboxes = [] for i in range(len(main_box_corner)): x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h bboxes.append([x0, y0, x1, y1]) return bboxes