예제 #1
0
def get_model(opt, pretrained=None, trn=True, weights_FIMs=None, alpha=1.):
    '''Getting model and initializing.
    Args:
        pretrained, None or path to pretrained model weights
        trn, True for training and False for evaluating'''
    # Model structure
    model = Darknet(opt.model_config_path, opt.img_size, weights_FIMs, alpha)
    print(model)
    # Initialize
    model.apply(weights_init_normal)
    # Pretrained or not
    coco_weights = True if pretrained == 'weights/yolov3.weights' else False
    try:
        model.load_weights(pretrained, use_coco=coco_weights)
    except TypeError:
        pass
    # Cuda or not
    if opt.cuda:
        model = model.cuda()
        cudnn.benchmark = True
    # Mode = train or eval
    if trn:
        model.train()
    else:
        model.eval()
    return model
예제 #2
0
def eval_flowchart(init_style, para_part, reg, alpha, ablation_type):
    '''Main body for evaluation'''
    args = all_args()
    # Storage path 'eval/'
    os.makedirs(args.save_folder, exist_ok=True)
    # Dataset
    dataset = get_data(args)
    # Load net
    net = Darknet(args.model_config_path, img_size=args.img_size)
    # Visdom
    viz, epoch_aps = init_viz(args, init_style, para_part, reg, alpha, dataset)
    # Evaluate
    ckpt_path, ckpts = get_ckpts(args, init_style, para_part, reg, alpha,
                                 ablation_type)

    mAP_max = 0
    for ckpt_idx, ckpt in enumerate(ckpts):
        # sample one for hyperparameter adjustment
        if ckpt_idx < 120:
            continue
        # Make output dir
        dir_name = '_'.join([
            ablation_type, args.arc, args.dataset, args.set_type, init_style,
            para_part, reg,
            str(alpha), ckpt,
            str(ckpt_idx)
        ])
        output_dir = get_output_dir(args.save_folder, dir_name)
        # Load weight
        args.weight_path = os.path.join(ckpt_path, ckpt)
        #  assert os.path.isfile(args.weight_path)
        try:
            net.load_weights(args.weight_path)
        except FileNotFoundError as err:
            print(err)
        # Cuda or not
        if args.cuda:
            net = net.cuda()
            cudnn.benchmark = True
        net.eval()
        print('Finished loading model!')
        # Evaluation, use_07_eval False
        aps, mAP = test_net(output_dir,
                            net,
                            args.cuda,
                            dataset,
                            args.score_thres,
                            args.nms_thres,
                            use_07_eval=False,
                            iou_thres=args.iou_thres)
        # If not greater than before, delete
        if mAP_max >= mAP:
            rmtree(output_dir)
        else:
            mAP_max = mAP
        # Visdom
        update_vis(viz, epoch_aps, ckpt_idx + 1, *aps, mAP)
예제 #3
0
def main():
    img_size = 512  # 必须是32的整数倍 [416, 512, 608]
    cfg = "cfg/yolov3-spp.cfg"
    weights = "weights/yolov3-spp-ultralytics-{}.pt".format(img_size)
    img_path = "test.jpg"
    input_size = (img_size, img_size)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = Darknet(cfg, img_size)
    model.load_state_dict(torch.load(weights, map_location=device)["model"])
    model.to(device)

    model.eval()

    # init
    img = torch.zeros((1, 3, img_size, img_size), device=device)
    model(img)

    img_o = cv2.imread(img_path)  # BGR
    assert img_o is not None, "Image Not Found " + img_path

    img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0]
    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img)

    img = torch.from_numpy(img).to(device).float()
    img /= 255.0  # scale (0, 255) to (0, 1)
    img = img.unsqueeze(0)  # add batch dimension

    t1 = torch_utils.time_synchronized()
    pred = model(img)[0]  # only get inference result
    t2 = torch_utils.time_synchronized()
    print(t2 - t1)

    pred = utils.non_max_suppression(pred, conf_thres=0.3, iou_thres=0.6, multi_label=True)[0]
    t3 = time.time()
    print(t3 - t2)

    # process detections
    pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round()
    print(pred.shape)

    bboxes = pred[:, :4].detach().cpu().numpy()
    scores = pred[:, 4].detach().cpu().numpy()
    classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1

    category_index = dict([(i + 1, str(i + 1)) for i in range(90)])
    img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index)
    plt.imshow(img_o)
    plt.show()

    img_o.save("test_result.jpg")
예제 #4
0
def setup_detector(opt):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    weights_path = os.path.join(
        opt.weights_path,
        "weights_RADAR.pth" if opt.radar else "weights_LIDAR.pth")
    # Set up model
    model = Darknet(opt.model_def, img_size=cnf.BEV_WIDTH).to(device)
    # Load checkpoint weights
    model.load_state_dict(torch.load(weights_path, map_location=device))
    # Eval mode
    model.eval()

    return model
예제 #5
0
def Load_Yolo(device):

    #Load Darknet
    yolo_model_def = os.path.join(yolo_path, 'config/yolov3-tiny.cfg')
    yolo_img_size = 416
    yolo_weights_path = os.path.join(yolo_path, 'weights/yolov3-tiny.weights')
    model = Darknet(yolo_model_def, img_size=yolo_img_size).to(device)

    if yolo_weights_path.endswith(".weights"):
        # Load darknet weights
        model.load_darknet_weights(yolo_weights_path)
    else:
        # Load checkpoint weights
        model.load_state_dict(torch.load(yolo_weights_path))

    model.eval()  # Set in evaluation mode
    return model
예제 #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_file", default="runs/config.json")
    parser.add_argument("--output_dir", default='output')
    parser.add_argument("--model_checkpoint")
    args = parser.parse_args()

    with open(args.config_file) as config_buffer:
        config = json.loads(config_buffer.read())

    val_loader_dict = dict()
    for i, dataset_config in enumerate(config['val']["datasets"]):
        val_dataset = VOCDetection(
            img_dir=dataset_config["image_folder"],
            annotation_dir=dataset_config["annot_folder"],
            cache_dir=dataset_config["cache_dir"],
            split_file=dataset_config['split_file'],
            img_size=config['model']['input_size'],
            filter_labels=config['model']['labels'],
            multiscale=False,
            augment=False)
        val_dataset.name = dataset_config.get('name')

        val_loader = DataLoader(val_dataset,
                                batch_size=config["val"]["batch_size"],
                                collate_fn=val_dataset.collate_fn,
                                shuffle=True)
        dataset_name = val_dataset.name if val_dataset.name else f"Dataset #{i}"
        val_loader_dict[dataset_name] = val_loader

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Darknet(config["model"]["config"]).to(device)
    model.load_state_dict(torch.load(args.model_checkpoint))
    model.eval()

    result_dict = evaluate(model, val_loader_dict, config["val"])
    for name, results in result_dict.items():
        output_str = f"{name} evaluation results:\n" \
            f"precision-{results['precision']},\n" \
            f"recall-{results['recall']},\n" \
            f"AP-{results['AP']},\n" \
            f"F1-{results['F1']},\n" \
            f"ap_class-{results['AP_class']}"
        print(output_str)
예제 #7
0
class Detector(object):
    def __init__(self, device, model_def, load_path, reg_threshold,
                 cls_threshold, nms_threshold, image_size):
        self.image_size = image_size
        self.model = Darknet(model_def, img_size=self.image_size).to(device)
        # TODO
        # change device to GPU
        self.model.load_state_dict(torch.load(load_path, map_location='cpu'))
        self.model.eval()
        self.reg_threshold = reg_threshold
        self.cls_threshold = cls_threshold
        self.nms_threshold = nms_threshold

        self.device = device

    @torch.no_grad()
    def __call__(self, image):
        original_size = image.shape[:2]
        tensor = torch.from_numpy(image).to(self.device).permute(2, 0, 1)
        tensor = tensor.contiguous().float().div_(255)
        _, h, w = tensor.shape
        dim_diff = np.abs(h - w)
        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
        pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
        tensor = f.pad(tensor, pad, "constant", value=0)
        tensor = f.interpolate(tensor.unsqueeze(0),
                               size=self.image_size,
                               mode="nearest").squeeze_(0)

        result = self.model(tensor.unsqueeze_(0))
        detection = non_max_suppression(result, self.reg_threshold,
                                        self.nms_threshold)[0]
        if detection is not None:
            detection = detection[detection[:, -2] > self.cls_threshold]
            detection = rescale_boxes(detection, self.image_size,
                                      original_size)
        else:
            print("detection result is None")
        return detection
예제 #8
0
def channels_select(prune_cfg, data, origin_model, aux_util, device,
                    data_loader, select_layer, pruned_rate):
    with open(progress_result, 'a') as f:
        f.write(('\n' + '%10s' * 9 + '\n') %
                ('Stage', 'Change', 'MSELoss', 'AuxLoss', 'Total', 'P', 'R',
                 '[email protected]', 'F1'))
    logger.info(('%10s' * 6) %
                ('Stage', 'Channels', 'Batch', 'MSELoss', 'AuxLoss', 'Total'))

    batch_size = data_loader.batch_size
    img_size = data_loader.dataset.img_size
    accumulate = 64 // batch_size
    hook_util = HookUtils()
    handles = []
    n_iter = math.floor(500 / batch_size)

    pruning_model = Darknet(prune_cfg,
                            img_size=(img_size, img_size)).to(device)
    chkpt = torch.load(progress_chkpt, map_location=device)
    pruning_model.load_state_dict(chkpt['model'], strict=True)

    aux_in_layer = aux_util.conv_layer_dict[select_layer]
    aux_model = aux_util.creat_aux_model(aux_in_layer)
    aux_model.to(device)

    aux_model.load_state_dict(chkpt['aux_in{}'.format(aux_in_layer)],
                              strict=True)
    aux_loss_scalar = max(0.01, pow((int(aux_in_layer) + 1) / 75, 2))

    del chkpt

    solve_sub_problem_optimizer = optim.SGD(
        pruning_model.module_list[int(aux_in_layer)].MaskConv2d.parameters(),
        lr=hyp['lr0'],
        momentum=hyp['momentum'])

    for name, child in origin_model.module_list.named_children():
        if name == aux_in_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_origin_output))
        if name == select_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_origin_output))

    for name, child in pruning_model.module_list.named_children():
        if name == aux_in_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_prune_output))
        if name == select_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_prune_output))

    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        origin_model = torch.nn.parallel.DistributedDataParallel(
            origin_model, find_unused_parameters=True)
        origin_model.yolo_layers = origin_model.module.yolo_layers
        pruning_model = torch.nn.parallel.DistributedDataParallel(
            pruning_model, find_unused_parameters=True)
        pruning_model.yolo_layers = pruning_model.module.yolo_layers

    retain_channels_num = math.floor(
        aux_util.layer_info[select_layer]["in_channels"] * (1 - pruned_rate))
    pruning_model.nc = 80
    pruning_model.hyp = hyp
    pruning_model.arc = 'default'
    pruning_model.eval()
    aux_model.eval()
    MSE = nn.MSELoss(reduction='mean')
    mloss = torch.zeros(3).to(device)

    for i_k in range(retain_channels_num):

        data_iter = iter(data_loader)
        pbar = tqdm(range(n_iter), total=n_iter)
        print(('\n' + '%10s' * 6) %
              ('Stage', 'gpu_mem', 'channels', 'MSELoss', 'AuxLoss', 'Total'))
        for i in pbar:

            imgs, targets, _, _ = data_iter.next()

            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = pruning_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets,
                                           pruning_model)

            hook_util.cat_to_gpu0()
            mse_loss = torch.zeros(1, device=device)

            aux_pred = aux_model(hook_util.prune_features['gpu0'][1], targets)
            aux_loss = compute_loss_for_DCP(aux_pred, targets)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp['joint_loss'] * mse_loss + aux_loss + 0 * pruning_loss

            loss.backward()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            s = ('%10s' * 3 + '%10.3g' * 3) % (
                'Prune ' + select_layer, '%.3gG' % mem, '%g/%g' %
                (i_k, retain_channels_num), hyp['joint_loss'] * mse_loss,
                aux_loss, loss)
            pbar.set_description(s)

            # if (i + 1) % 10 == 0:
            #     logger.info(('%10s' * 3 + '%10.3g' * 3) %
            #                 ('Prune' + select_layer, str(i_k), '%g/%g' % (i, n_iter), hyp['joint_loss'] * mse_loss,
            #                  aux_loss, loss))

            hook_util.clean_hook_out()

        grad = pruning_model.module.module_list[int(
            select_layer)].MaskConv2d.weight.grad.detach()**2
        grad = grad.sum((2, 3)).sqrt().sum(0)

        if i_k == 0:
            pruning_model.module.module_list[int(
                select_layer)].MaskConv2d.selected_channels_mask[:] = 1e-5
            if select_layer in aux_util.sync_guide.keys():
                sync_layer = aux_util.sync_guide[select_layer]
                pruning_model.module.module_list[int(
                    sync_layer)].MaskConv2d.selected_channels_mask[(
                        -1 * aux_util.layer_info[select_layer]["in_channels"]
                    ):] = 1e-5

        selected_channels_mask = pruning_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask
        _, indices = torch.topk(grad * (1 - selected_channels_mask), 1)

        pruning_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask[indices] = 1
        if select_layer in aux_util.sync_guide.keys():
            pruning_model.module.module_list[int(
                sync_layer)].MaskConv2d.selected_channels_mask[-(
                    aux_util.layer_info[select_layer]["in_channels"] -
                    indices)] = 1

        pruning_model.zero_grad()

        pbar = tqdm(range(n_iter), total=n_iter)
        print(('\n' + '%10s' * 6) %
              ('Stage', 'gpu_mem', 'channels', 'MSELoss', 'AuxLoss', 'Total'))
        for i in pbar:

            imgs, targets, _, _ = data_iter.next()

            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = pruning_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets,
                                           pruning_model)

            hook_util.cat_to_gpu0()
            mse_loss = torch.zeros(1, device=device)

            aux_pred = aux_model(hook_util.prune_features['gpu0'][1], targets)
            aux_loss = compute_loss_for_DCP(aux_pred, targets)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp[
                'joint_loss'] * mse_loss + aux_loss_scalar * aux_loss + 0 * pruning_loss

            loss.backward()

            if i % accumulate == 0:
                solve_sub_problem_optimizer.step()
                solve_sub_problem_optimizer.zero_grad()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            mloss = (mloss * i +
                     torch.cat([hyp['joint_loss'] * mse_loss, aux_loss, loss
                                ]).detach()) / (i + 1)
            s = ('%10s' * 3 + '%10.3g' * 3) % (
                'SubProm ' + select_layer, '%.3gG' % mem, '%g/%g' %
                (i_k, retain_channels_num), *mloss)
            pbar.set_description(s)

            if (i + 1) % n_iter == 0:
                logger.info(('%10s' * 3 + '%10.3g' * 3) %
                            ('SubPro' + select_layer, str(i_k), '%g/%g' %
                             (i, n_iter), *mloss))

            hook_util.clean_hook_out()

    for handle in handles:
        handle.remove()

    greedy_indices = pruning_model.module.module_list[int(
        select_layer)].MaskConv2d.selected_channels_mask < 1
    pruning_model.module.module_list[int(
        select_layer)].MaskConv2d.selected_channels_mask[greedy_indices] = 0

    res, _ = test.test(prune_cfg,
                       data,
                       batch_size=batch_size * 2,
                       img_size=416,
                       model=pruning_model,
                       conf_thres=0.1,
                       iou_thres=0.5,
                       save_json=False,
                       dataloader=None)

    chkpt = torch.load(progress_chkpt, map_location=device)
    chkpt['current_layer'] = aux_util.next_prune_layer(select_layer)
    chkpt['epoch'] = -1
    chkpt['model'] = pruning_model.module.state_dict() if type(
        pruning_model
    ) is nn.parallel.DistributedDataParallel else pruning_model.state_dict()
    chkpt['optimizer'] = None

    torch.save(chkpt, progress_chkpt)

    torch.save(chkpt, last)
    del chkpt

    with open(progress_result, 'a') as f:
        f.write(('%10s' * 2 + '%10.3g' * 7) %
                ('Pruning ' + select_layer,
                 str(aux_util.layer_info[select_layer]['in_channels']) + '->' +
                 str(retain_channels_num), *mloss, *res[:4]) + '\n')

    torch.cuda.empty_cache()
예제 #9
0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    os.makedirs("output", exist_ok=True)

    # Set up model
    model = Darknet(opt.model_def, img_size=opt.img_size).to(device)

    if opt.weights_path.endswith(".weights"):
        # Load darknet weights
        model.load_darknet_weights(opt.weights_path)
    else:
        # Load checkpoint weights
        model.load_state_dict(torch.load(opt.weights_path)["model_state_dict"])

    model.eval()  # Set in evaluation mode

    dataloader = DataLoader(
        ImageFolder(opt.image_folder, img_size=opt.img_size),
        batch_size=opt.batch_size,
        shuffle=False,
        num_workers=opt.n_cpu,
    )

    classes = load_classes(opt.class_path)  # Extracts class labels from file

    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.FloatTensor

    imgs = []  # Stores image paths
    img_detections = []  # Stores detections for each image index
예제 #10
0
class YOLOV4(object):
    if CWD == THIS_DIR:
        _defaults = {
            "weights": "weights/yolov4.weights",
            "config": "cfg/yolov4.cfg",
            "classes_path": 'cfg/coco.names',
            "thresh": 0.5,
            "nms_thresh": 0.4,
            "model_image_size": (608, 608),
            "max_batch_size": 4,
            "half": True
        }
    else:
        _defaults = {
            "weights": "yolov4_pytorch/weights/yolov4.weights",
            "config": "yolov4_pytorch/cfg/yolov4.cfg",
            "classes_path": 'yolov4_pytorch/cfg/coco.names',
            "thresh": 0.5,
            "nms_thresh": 0.4,
            "model_image_size": (608, 608),
            "max_batch_size": 4,
            "half": True
        }

    def __init__(self, bgr=True, gpu_device=0, **kwargs):
        self.__dict__.update(self._defaults)  # set up default values
        # for portability between keras-yolo3/yolo.py and this
        if 'model_path' in kwargs:
            kwargs['weights'] = kwargs['model_path']
        if 'score' in kwargs:
            kwargs['thresh'] = kwargs['score']
        self.__dict__.update(kwargs)  # update with user overrides

        self.class_names = self._get_class()
        self.model = Darknet(self.config)
        self.model.load_darknet_weights(self.weights)

        self.device = gpu_device
        self.model.cuda(self.device)
        self.model.eval()

        self.bgr = bgr

        if self.half:
            self.model.half()

        # warm up
        self._detect([np.zeros((10, 10, 3), dtype=np.uint8)])
        print('Warmed up!')

    def _get_class(self):
        with open(self.classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    def _detect(self, list_of_imgs):
        inputs = []
        for img in list_of_imgs:
            if self.bgr:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                # print('bgr: {}'.format(img.shape))
            # print('size: {}'.format(self.model_image_size))
            image = cv2.resize(img, self.model_image_size)
            # print('image: {}'.format(image.shape))
            inputs.append(np.expand_dims(np.array(image), axis=0))

        images = np.concatenate(inputs, 0)

        # print('images: {}'.format(images.shape))
        images = torch.from_numpy(images.transpose(0, 3, 1,
                                                   2)).float().div(255.0)

        images = images.cuda()
        images = torch.autograd.Variable(images)

        if self.half:
            images = images.half()

        batches = []
        for i in range(0, len(images), self.max_batch_size):
            these_imgs = images[i:i + self.max_batch_size]
            batches.append(these_imgs)

        feature_list = None
        with torch.no_grad():
            for batch in batches:
                img = batch.cuda(self.device)
                features = self.model(img)

                if feature_list is None:
                    feature_list = features
                else:
                    feature_list = torch.cat((feature_list, features))

        # feature_list: (batch, height * width * num_anchors, 5 + num_classes)
        return feature_list

    def detect_get_box_in(self,
                          images,
                          box_format='ltrb',
                          classes=None,
                          buffer_ratio=0.0):
        '''
        Params
        ------
        - images : ndarray-like or list of ndarray-like
        - box_format : string of characters representing format order, where l = left, t = top, r = right, b = bottom, w = width and h = height
        - classes : list of string, classes to focus on
        - buffer : float, proportion of buffer around the width and height of the bounding box

        Returns
        -------
        if one ndarray given, this returns a list (boxes in one image) of tuple (box_infos, score, predicted_class),
        
        else if a list of ndarray given, this return a list (batch) containing the former as the elements,

        where,
            - box_infos : list of floats in the given box format
            - score : float, confidence level of prediction
            - predicted_class : string

        '''
        single = False
        if isinstance(images, list):
            if len(images) <= 0:
                return None
            else:
                assert all(isinstance(im, np.ndarray) for im in images)
        elif isinstance(images, np.ndarray):
            images = [images]
            single = True

        res = self._detect(images)
        frame_shapes = [image.shape for image in images]
        all_dets = self._postprocess(res,
                                     shapes=frame_shapes,
                                     box_format=box_format,
                                     classes=classes,
                                     buffer_ratio=buffer_ratio)

        if single:
            return all_dets[0]
        else:
            return all_dets

    def get_detections_dict(self, frames, classes=None, buffer_ratio=0.0):
        '''
        Params: frames, list of ndarray-like
        Returns: detections, list of dict, whose key: label, confidence, t, l, w, h
        '''
        if frames is None or len(frames) == 0:
            return None
        all_dets = self.detect_get_box_in(frames,
                                          box_format='tlbrwh',
                                          classes=classes,
                                          buffer_ratio=buffer_ratio)

        all_detections = []
        for dets in all_dets:
            detections = []
            for tlbrwh, confidence, label in dets:
                top, left, bot, right, width, height = tlbrwh
                detections.append({
                    'label': label,
                    'confidence': confidence,
                    't': top,
                    'l': left,
                    'b': bot,
                    'r': right,
                    'w': width,
                    'h': height
                })
            all_detections.append(detections)
        return all_detections

    def _nms(self, predictions):
        predictions[..., :4] = self.xywh2p1p2(predictions[..., :4])
        outputs = [None for _ in range(len(predictions))]

        for i, image_pred in enumerate(predictions):
            image_pred = image_pred[image_pred[:, 4] >= self.thresh]

            # If none anchor are remaining => process next image
            if not image_pred.size(0):
                continue

            # Object confidence times class confidence  (n, ) * (n, )
            score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
            class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)

            detections = torch.cat(
                (image_pred[:, :5], class_confs.type(
                    predictions.dtype), class_preds.type(predictions.dtype)),
                dim=1)

            keep = batched_nms(image_pred[:, :4].float(), score,
                               class_preds[:, 0], self.nms_thresh)
            outputs[i] = detections[keep]

        return outputs

    @staticmethod
    def xywh2p1p2(x):
        y = x.new(x.shape)
        y[..., 0] = x[..., 0] - x[..., 2] / 2.
        y[..., 1] = x[..., 1] - x[..., 3] / 2.
        y[..., 2] = x[..., 0] + x[..., 2] / 2.
        y[..., 3] = x[..., 1] + x[..., 3] / 2.
        return y

    @staticmethod
    def p1p2Toxywh(x):
        y = x.new(x.shape)
        y[..., 0] = x[..., 0]
        y[..., 1] = x[..., 1]
        y[..., 2] = x[..., 2] - x[..., 0]
        y[..., 3] = x[..., 3] - x[..., 1]
        return y

    def _postprocess(self,
                     outputs,
                     shapes,
                     box_format='ltrb',
                     classes=None,
                     buffer_ratio=0.0):
        outputs = self._nms(outputs)

        detections = []
        for i, frame_bbs in enumerate(outputs):
            im_height, im_width, _ = shapes[i]
            if frame_bbs is None:
                detections.append([])
                continue

            frame_bbs = self._resize_boxes(frame_bbs, self.model_image_size,
                                           (im_height, im_width))
            frame_dets = []
            for box in frame_bbs:
                pred_box = self.p1p2Toxywh(box[:4]).data.cpu().numpy()
                # box = box.data.cpu().numpy()
                cls_conf = box[4].item()
                cls_id = box[-1]
                cls_name = self.class_names[int(cls_id)]

                if classes is not None and cls_name not in classes:
                    continue

                left, top, w, h = pred_box
                right = left + w
                bottom = top + h

                width = right - left + 1
                height = bottom - top + 1
                width_buffer = width * buffer_ratio
                height_buffer = height * buffer_ratio

                top = max(0.0, top - 0.5 * height_buffer)
                left = max(0.0, left - 0.5 * width_buffer)
                bottom = min(im_height - 1.0, bottom + 0.5 * height_buffer)
                right = min(im_width - 1.0, right + 0.5 * width_buffer)

                box_infos = []
                for c in box_format:
                    if c == 't':
                        box_infos.append(int(round(top)))
                    elif c == 'l':
                        box_infos.append(int(round(left)))
                    elif c == 'b':
                        box_infos.append(int(round(bottom)))
                    elif c == 'r':
                        box_infos.append(int(round(right)))
                    elif c == 'w':
                        box_infos.append(int(round(width + width_buffer)))
                    elif c == 'h':
                        box_infos.append(int(round(height + height_buffer)))
                    else:
                        assert False, 'box_format given in detect unrecognised!'
                assert len(box_infos) > 0, 'box infos is blank'

                detection = (box_infos, cls_conf, cls_name)
                frame_dets.append(detection)
            detections.append(frame_dets)

        return detections

    @staticmethod
    def _resize_boxes(boxes, current_dim, original_shape):
        h_ratio = original_shape[0] / current_dim[0]
        w_ratio = original_shape[1] / current_dim[1]
        boxes[..., 0] *= w_ratio
        boxes[..., 1] *= h_ratio
        boxes[..., 2] *= w_ratio
        boxes[..., 3] *= h_ratio
        return boxes
예제 #11
0
def main():
    img_size = 512  # 必须是32的整数倍 [416, 512, 608]
    cfg = "/home/mist/yolov3_spp/cfg/yolov3-spp.cfg"  # 改成生成的.cfg文件
    weights = "/home/mist/yolov3_spp/weights/yolov3spp-29.pt".format(
        img_size)  # 改成自己训练好的权重文件
    json_path = "/home/mist/yolov3_spp/data/pascal_voc_classes.json"  # json标签文件
    img_path = "test.jpg"
    assert os.path.exists(cfg), "cfg file {} dose not exist.".format(cfg)
    assert os.path.exists(weights), "weights file {} dose not exist.".format(
        weights)
    assert os.path.exists(json_path), "json file {} dose not exist.".format(
        json_path)
    assert os.path.exists(img_path), "image file {} dose not exist.".format(
        img_path)

    json_file = open(json_path, 'r')
    class_dict = json.load(json_file)
    category_index = {v: k for k, v in class_dict.items()}

    input_size = (img_size, img_size)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = Darknet(cfg, img_size)
    model.load_state_dict(torch.load(weights, map_location=device)["model"])
    model.to(device)

    model.eval()
    with torch.no_grad():
        # init
        img = torch.zeros((1, 3, img_size, img_size), device=device)
        model(img)

        img_o = cv2.imread(img_path)  # BGR
        assert img_o is not None, "Image Not Found " + img_path

        img = img_utils.letterbox(img_o,
                                  new_shape=input_size,
                                  auto=True,
                                  color=(0, 0, 0))[0]
        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        img = torch.from_numpy(img).to(device).float()
        img /= 255.0  # scale (0, 255) to (0, 1)
        img = img.unsqueeze(0)  # add batch dimension

        t1 = torch_utils.time_synchronized()
        pred = model(img)[0]  # only get inference result
        t2 = torch_utils.time_synchronized()
        print(t2 - t1)

        pred = utils.non_max_suppression(pred,
                                         conf_thres=0.1,
                                         iou_thres=0.6,
                                         multi_label=True)[0]
        t3 = time.time()
        print(t3 - t2)

        if pred is None:
            print("No target detected.")
            exit(0)

        # process detections
        pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4],
                                         img_o.shape).round()
        print(pred.shape)

        bboxes = pred[:, :4].detach().cpu().numpy()
        scores = pred[:, 4].detach().cpu().numpy()
        classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1

        img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores,
                         category_index)
        plt.imshow(img_o)
        plt.show()

        img_o.save("test_result.jpg")
예제 #12
0
    model3 = Darknet(os.path.join(
        BASE_DIR, "yolo_v3/config/yolov3-custom.cfg")).to(device)
    model3.load_state_dict(
        torch.load(os.path.join(models_path, "yolo_v3_4_25.pt"),
                   map_location=device))

    dataset = MyTestDataset(split='stage1_train',
                            transforms=get_test_transforms(rescale_size=(416,
                                                                         416)))

    test_loader = DataLoader(dataset,
                             batch_size=1,
                             num_workers=0,
                             shuffle=False)

    model.eval()
    model2.eval()
    model3.eval()
    for i, (image, targets) in enumerate(test_loader):
        image = image[0].to(device=device)
        name = targets["name"][0]
        start_time = time.time()
        with torch.no_grad():
            outputs = model(image)
            outputs2 = model2(image)
            outputs3 = model3(image)
        outputs = non_max_suppression(outputs, conf_thres=0.5)
        outputs2 = non_max_suppression(outputs2, conf_thres=0.5)
        outputs3 = non_max_suppression(outputs3, conf_thres=0.5)

        elapsed_time = time.time() - start_time
예제 #13
0
def mask_catch(input, output):

    parser = argparse.ArgumentParser()
    parser.add_argument(u"--input_file_path",
                        type=unicode,
                        default=input,
                        help=u"path to images directory")
    parser.add_argument(u"--output_path",
                        type=unicode,
                        default=output,
                        help=u"output image directory")
    parser.add_argument(u"--model_def",
                        type=unicode,
                        default=u"data/yolov3_mask.cfg",
                        help=u"path to model definition file")
    parser.add_argument(u"--weights_path",
                        type=unicode,
                        default=u"checkpoints/yolov3_ckpt_499.pth",
                        help=u"path to weights file")
    parser.add_argument(u"--class_path",
                        type=unicode,
                        default=u"data/mask_dataset.names",
                        help=u"path to class label file")
    parser.add_argument(u"--conf_thres",
                        type=float,
                        default=0.8,
                        help=u"object confidence threshold")
    parser.add_argument(u"--nms_thres",
                        type=float,
                        default=0.3,
                        help=u"iou thresshold for non-maximum suppression")
    parser.add_argument(u"--frame_size",
                        type=int,
                        default=416,
                        help=u"size of each image dimension")

    opt = parser.parse_args()
    # Output directory
    os.makedirs(opt.output_path, exist_ok=True)

    # checking for GPU
    device = torch.device(u"cuda" if torch.cuda.is_available() else u"cpu")

    # Set up model
    model = Darknet(opt.model_def, img_size=opt.frame_size).to(device)

    # loading weights
    if opt.weights_path.endswith(u".weights"):
        model.load_darknet_weights(opt.weights_path)  # Load weights
    else:
        model.load_state_dict(torch.load(opt.weights_path))  # Load checkpoints

    # Set in evaluation mode
    model.eval()

    # Extracts class labels from file
    classes = load_classes(opt.class_path)

    # ckecking for GPU for Tensor
    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.FloatTensor

    print u"\nPerforming object detection:"

    # for text in output
    t_size = cv2.getTextSize(u" ", cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]

    for imagename in os.listdir(opt.input_file_path):

        print u"\n" + imagename + u"_______"
        image_path = os.path.join(opt.input_file_path, imagename)
        print image_path
        # frame extraction
        org_img = cv2.imread(image_path)

        # Original image width and height
        i_height, i_width = org_img.shape[:2]

        # resizing => [BGR -> RGB] => [[0...255] -> [0...1]] => [[3, 416, 416] -> [416, 416, 3]]
        #                       => [[416, 416, 3] => [416, 416, 3, 1]] => [np_array -> tensor] => [tensor -> variable]

        # resizing to [416 x 416]

        # Create a black image
        x = y = i_height if i_height > i_width else i_width

        # Black image
        img = np.zeros((x, y, 3), np.uint8)

        # Putting original image into black image
        start_new_i_height = int((y - i_height) / 2)
        start_new_i_width = int((x - i_width) / 2)

        img[start_new_i_height:(start_new_i_height + i_height),
            start_new_i_width:(start_new_i_width + i_width)] = org_img

        #resizing to [416x 416]
        img = cv2.resize(img, (opt.frame_size, opt.frame_size))

        # [BGR -> RGB]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # [[0...255] -> [0...1]]
        img = np.asarray(img) / 255
        # [[3, 416, 416] -> [416, 416, 3]]
        img = np.transpose(img, [2, 0, 1])
        # [[416, 416, 3] => [416, 416, 3, 1]]
        img = np.expand_dims(img, axis=0)
        # [np_array -> tensor]
        img = torch.Tensor(img)

        # plt.imshow(img[0].permute(1, 2, 0))
        # plt.show()

        # [tensor -> variable]
        img = Variable(img.type(Tensor))

        # Get detections
        with torch.no_grad():
            detections = model(img)

        detections = non_max_suppression_output(detections, opt.conf_thres,
                                                opt.nms_thres)

        # print(detections)

        # For accommodate results in original frame
        mul_constant = x / opt.frame_size

        #We should set a variable for the number of nomask people. i is the variable
        i = 0

        # For each detection in detections
        for detection in detections:
            if detection is not None:

                print u"{0} Detection found".format(len(detection))
                for x1, y1, x2, y2, conf, cls_conf, cls_pred in detection:

                    # Accommodate bounding box in original frame
                    x1 = int(x1 * mul_constant - start_new_i_width)
                    y1 = int(y1 * mul_constant - start_new_i_height)
                    x2 = int(x2 * mul_constant - start_new_i_width)
                    y2 = int(y2 * mul_constant - start_new_i_height)

                    # Bounding box making and setting Bounding box title
                    if (int(cls_pred) == 0):

                        # WITH_MASK
                        cv2.rectangle(org_img, (x1, y1), (x2, y2), (0, 255, 0),
                                      2)
                    else:
                        #WITHOUT_MASK
                        i += 1
                        cv2.rectangle(org_img, (x1, y1), (x2, y2), (0, 0, 255),
                                      2)

                    cv2.putText(org_img,
                                classes[int(cls_pred)] + u": %.2f" % conf,
                                (x1, y1 + t_size[1] + 4),
                                cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 2)
        u"""------------Ready to save!-----------------"""
        import time
        now = time.strftime(u"%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))

        #num is the number of people
        num = len(detection)
        #na=now + '-' + 'NUM:%d'%num +'-'+ 'Nom:%d'%i+'-'+'.jpg'
        u"""------------txt_save-----------------"""
        u"""------------image_save-----------------"""
        na = u'result.jpg'
        out_filepath = os.path.join(opt.output_path, na)
        cv2.imwrite(out_filepath,
                    org_img)  #org_img is final result with frames

        #naa = now + '-' + 'NUM:%d' % num + '-' + 'Nom:%d' % i
        #ssh_scp_put('172.21.39.222',22,'tensor','tensor',out_filepath,'/home/tensor/eden/%s.jpg'%naa)
        #upload_img(na)
        #os.remove(out_filepath)

        signal = 1  #we first set signal only 1

        if i == 0:
            signal = 0

        print u"Signal is ", signal
        print u"Finish to save!!!"

        msg = now + u'-' + u'NUM:%d' % num + u'-' + u'Nomask:%d' % i + u'-'
        nam = u'info.txt'
        full_path = os.path.join(opt.output_path, nam)
        print u"----------------"
        file = open(full_path, u'w')
        file.write(msg)

    cv2.destroyAllWindows()
    return signal
예제 #14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_file", default="config/runs/config.json")
    parser.add_argument("--output_dir", default='output')
    args = parser.parse_args()

    with open(args.config_file) as config_buffer:
        config = json.loads(config_buffer.read())

    exp_name = get_experiment_name(config)
    print(f"Experiment name: {exp_name}")
    out_dir = os.path.join(args.output_dir, exp_name)
    if os.path.exists(out_dir):
        print("experiment dir already exists! Removing...")
        shutil.rmtree(out_dir)

    os.makedirs(out_dir)

    log_dir = f"{out_dir}/logs"
    checkpoint_dir = f"{out_dir}/checkpoints"
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    tb_logger = SummaryWriter(log_dir)

    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        handlers=[
            logging.FileHandler(f"{out_dir}/log.log"),
            logging.StreamHandler(sys.stdout)
        ],
        level=logging.INFO)

    logger = logging.getLogger(__name__)
    logging.info("New session")

    seed = config["train"]["seed"]
    if seed > 0:
        np.random.seed(seed)
        torch.manual_seed(seed)

    ###############################
    #   Prepare data loaders
    ###############################
    print("Loading datasets...")
    if config['val']['validate']:
        train_loader, val_concat_loader, val_loader_dict = prepare_dataloaders(
            config)
    else:
        train_loader = prepare_dataloaders(config)
    print("Loaded!")
    if config["train"]["debug"]:
        image_batch, target = next(iter(train_loader))
        draw_image_batch_with_targets(image_batch[:4], target, cols=2)

        if config['val']['validate']:
            val_image_batch, val_target = next(iter(val_concat_loader))
            draw_image_batch_with_targets(val_image_batch[:4],
                                          val_target,
                                          cols=2)

    ###############################
    #   Construct the model
    ###############################

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = Darknet(config["model"]["config"]).to(device)
    model.apply(weights_init_normal)
    print("Model initialized!")

    if config["train"]["freeze_feature_extractor"]:
        model.freeze_feature_extractor()

    print(f"Trainable params: {get_trainable_params_num(model):,}")

    # If specified we start from checkpoint
    if config["model"]["pretrained_weights"]:
        if config["model"]["pretrained_weights"].endswith(".pth"):
            model.load_state_dict(
                torch.load(config["model"]["pretrained_weights"]))
        else:
            model.load_darknet_weights(config["model"]["pretrained_weights"])
            print("Pretrained weights loaded!")

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config["train"]["learning_rate"])

    ###############################
    #   Training
    ###############################
    batches_done = 0
    grad_accumulations = config["train"]["gradient_accumulations"]
    save_every = config["train"]["save_every"]

    if config["val"]["validate"]:
        val_iterator = iter(val_concat_loader)

    for epoch in range(config["train"]["nb_epochs"]):

        effective_loss = 0
        loss_history = torch.zeros(len(train_loader))
        logger.info(f"Epoch {epoch} started!")
        bar = tqdm(train_loader)
        for i, (image_batch, bboxes) in enumerate(bar):
            model.train()
            image_batch = image_batch.to(device)
            bboxes = bboxes.to(device)

            loss, outputs = model(image_batch, bboxes)
            effective_loss += loss.item()
            loss_history[i] = loss.item()

            loss.backward()

            if i % grad_accumulations == 0:
                # Accumulates gradient before each step
                optimizer.step()

                if config["train"]["gradient_clipping"]:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
                grad_norm = get_grad_norm(model)

                optimizer.zero_grad()
                if config["val"]["validate"]:
                    model.eval()

                    try:
                        val_image_batch, val_bboxes = next(val_iterator)
                    except StopIteration:
                        val_iterator = iter(val_concat_loader)
                        val_image_batch, val_bboxes = next(val_iterator)
                    val_image_batch = val_image_batch.to(device)
                    val_bboxes = val_bboxes.to(device)
                    with torch.no_grad():
                        val_loss, val_outputs = model(val_image_batch,
                                                      val_bboxes)

                    tb_logger.add_scalar("loss/validation", val_loss,
                                         batches_done)

                bar.set_description(
                    f"Loss: {effective_loss / grad_accumulations:.6f}")

                batches_done += 1

                # Tensorboard logging
                for metric_name in metrics:
                    metric_dict = {}
                    for j, yolo_layer in enumerate(model.yolo_layers):
                        metric_dict[f"yolo_{j}"] = yolo_layer.metrics[
                            metric_name]

                    if metric_name == 'loss':
                        metric_dict["overall"] = loss.item()

                    tb_logger.add_scalars(metric_name, metric_dict,
                                          batches_done)
                tb_logger.add_scalar("grad_norm", grad_norm, batches_done)
                tb_logger.add_scalar("loss/effective_loss", effective_loss,
                                     batches_done)

                effective_loss = 0

                # save model
                if save_every > 0 and batches_done % save_every == 0:
                    torch.save(model.state_dict(),
                               f"{checkpoint_dir}/yolov3_{batches_done}.pth")

        epoch_loss = loss_history.mean()
        print(f"Epoch loss: {epoch_loss}")
        tb_logger.add_scalar("epoch_loss", epoch_loss, epoch)

        if config["val"]["validate"]:
            result_dict = evaluate(model, val_loader_dict, config["val"])
            for name, results in result_dict.items():
                output_str = f"{name} evaluation results:\n" \
                    f"precision-{results['precision']},\n" \
                    f"recall-{results['recall']},\n" \
                    f"AP-{results['AP']},\n" \
                    f"F1-{results['F1']},\n" \
                    f"ap_class-{results['AP_class']}"
                logging.info(output_str)
                print(output_str)

                tb_logger.add_scalar(f"val_precision/{name}",
                                     results['precision'], epoch)
                tb_logger.add_scalar(f"val_recall/{name}", results['recall'],
                                     epoch)
                tb_logger.add_scalar(f"val_F1/{name}", results['F1'], epoch)
                tb_logger.add_scalar(f"val_AP/{name}", results['AP'], epoch)

        # save model
        torch.save(model.state_dict(),
                   f"{checkpoint_dir}/yolov3_epoch_{epoch}.pth")
예제 #15
0
    opt = parser.parse_args()
    print(opt)

    os.makedirs("output", exist_ok=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Darknet(opt.model_def, img_size=opt.img_size).to(device)

    if opt.weights_path.endswith(".weights"):
        """Load darknet weights"""
        model.load_darknet_weights(opt.weights_path)
    else:
        """Load checkpoint weights"""
        model.load_state_dict(torch.load(opt.weights_path))

    model.eval()

    dataloader = DataLoader(
        ImageFolder(opt.image_folder, img_size=opt.img_size),
        batch_size=opt.batch_size,
        shuffle=False,
        num_workers=opt.n_cpu,
    )

    classes = load_classes(opt.class_path)  # Extracts class labels from file

    imgs = []  # Stores image paths
    img_detections = []  # Stores detections for each image index

    print("\nPerforming object detection:")
    prev_time = time.time()
예제 #16
0
def detect_shoes(img, conf_thres=0.1, nms_thres=0.4, box_extension=0):
'''Detect shoes in an image.
Given an image, detect where the shoes are and output the bounding box coordinates,
class confidence scores and confidence score.
Input:
- img: image data from Image.open(img_path).
- conf_thres: confidence score threshold. Float.
- nms_thres: threshold for non maximum suppression.
Output:
- croppend images?
- bounding box coordinates
- confidence scores
'''
    model_def = 'config/yolov3-openimages.cfg'
    weights_path = 'config/yolov3-openimages.weights'
    class_path = 'config/oidv6.names'
    conf_thres = 0.1
    nms_thres = 0.4
    batch_size = 1
    n_cpu = 0
    img_size = 416


    # Extract image as PyTorch tensor
    img_original = transforms.ToTensor()(img)
    img_shape_original = img_original.shape.permute(1,2,0)
    # Pad to square resolution
    img, _ = pad_to_square(img, 0)
    # Resize
    img = resize(img, img_size)
    img = img.unsqueeze_(0)

    # Set up device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Set up model
    model = Darknet(model_def, img_size=img_size).to(device)
    model.load_darknet_weights(weights_path)

    model.eval()  # Set in evaluation mode
    classes = load_classes(class_path)  # Extracts class labels from file
    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
    input_imgs = Variable(img.type(Tensor))

    # Get detections
    with torch.no_grad():
        detections = model(input_imgs) # (B, A, )
        detections = non_max_suppression_for_footwear(detections, conf_thres, nms_thres)[0]

    if detections is not None:
        detections = rescale_boxes(detections, img_size, img_shape_original[:2])
        cropped_imgs = []
        bbox_coords = []
        for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
            x1 = round(x1)
            y1 = round(y1)
            x2 = round(x2)
            y2 = round(y2)

            cropped_imgs.append(img_original[:, x1:x2, y1:y2])
            bbox_coords.append([x1,y1,x2,y2])
        return cropped_imgs, bbox_coords
    else:
        return None, None
        

    # Bounding-box colors
    cmap = plt.get_cmap("tab20b")
    colors = [cmap(i) for i in np.linspace(0, 1, 20)]


    # # Create plot
    # img = np.array(Image.open(img_path))
    # plt.figure()
    # fig, ax = plt.subplots(1)
    # ax.imshow(img)

    # # Draw bounding boxes and labels of detections
    # if detections is not None:
    #     # Rescale boxes to original image
    #     detections = rescale_boxes(detections, img_size, img.shape[:2])
    #     unique_labels = detections[:, -1].cpu().unique()
    #     n_cls_preds = len(unique_labels)
    #     bbox_colors = random.sample(colors, n_cls_preds)
    #     for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:

    #         print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item()))

    #         box_w = x2 - x1
    #         box_h = y2 - y1

    #         color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
    #         # Create a Rectangle patch
    #         bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none")
    #         # Add the bbox to the plot
    #         ax.add_patch(bbox)
    #         # Add label
    #         plt.text(
    #             x1,
    #             y1,
    #             s=classes[int(cls_pred)]+', %.2f'%conf.item(),
    #             color="white",
    #             verticalalignment="top",
    #             bbox={"color": color, "pad": 0},
    #         )

    # # Save generated image with detections
    # plt.axis("off") 
예제 #17
0
def main():
    # load model
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model = Darknet(cfg.MODEL, img_size=cfg.SIZE).to(device)
    model.load_darknet_weights(cfg.WEIGHTS)
    model.eval()

    # coco classes
    classes = load_classes(cfg.CLASSES)

    # animals and person
    app_classes = [0, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]

    # tensor type
    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

    # create video capture
    cap = cv2.VideoCapture('udp://127.0.0.1:5000', cv2.CAP_FFMPEG)
    if not cap.isOpened():
        print('VideoCapture not opened')
        exit(-1)

    # preprocess pipeline
    t = transforms.Compose([
        transforms.Resize((cfg.SIZE, cfg.SIZE)),
        transforms.ToTensor()
    ])

    # tracker
    tracker = Sort()

    # bbox colors
    colors=[
        (255,0,0),
        (0,255,0),
        (0,0,255),
        (255,0,255),
        (128,0,0),
        (0,128,0),
        (0,0,128),
        (128,0,128),
        (128,128,0),
        (0,128,128)
    ]

    # process stream
    while True:
        # read frame
        ret, frame = cap.read()
        # frame = cv2.flip(cv2.flip(frame, 0), 1)
        orig = frame
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(frame)

        # process image
        img = t(img).unsqueeze(0).type(Tensor)
        with torch.no_grad():
            detections = model(img)
            detections = non_max_suppression(detections, cfg.CONF, cfg.NMS)
            detections = detections[0]
            if detections is not None:
                # track objects
                tracked_objects = tracker.update(detections.cpu())
                det = rescale_boxes(tracked_objects, cfg.SIZE, frame.shape[:2])
                for x1, y1, x2, y2, obj_id, cls_pred in det:
                    # ignore not necessary classes
                    if int(cls_pred) not in app_classes:
                        continue
                    # draw bbox
                    color = colors[int(obj_id) % len(colors)]
                    cls = classes[int(cls_pred)]
                    x1, x2, y1, y2 = int(x1), int(x2), int(y1), int(y2)
                    cv2.rectangle(orig, (x1, y1), (x2, y2), color, 2)
                    cv2.putText(
                        orig,
                        cls + '-' + str(int(obj_id)),
                        (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        1,
                        (255, 255, 255),
                        3
                    )

        cv2.imshow('YoloV3', orig)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
예제 #18
0
def greedy_channel_select(origin_model, prune_cfg, origin_weights,
                          select_layer, device, aux_util, data_loader,
                          pruned_rate):
    init_state_dict = mask_converted(prune_cfg, origin_weights, target=None)

    prune_model = Darknet(prune_cfg).to(device)
    prune_model.load_state_dict(init_state_dict, strict=True)
    del init_state_dict
    solve_sub_problem_optimizer = optim.SGD(
        prune_model.module_list[int(select_layer)].MaskConv2d.parameters(),
        lr=hyp['lr0'],
        momentum=hyp['momentum'])
    hook_util = HookUtils()
    handles = []

    info = aux_util.layer_info[int(select_layer)]
    in_channels = info['in_channels']
    remove_k = math.floor(in_channels * pruned_rate)
    k = in_channels - remove_k

    for name, child in origin_model.module_list.named_children():
        if name == select_layer:
            handles.append(
                child.BatchNorm2d.register_forward_hook(
                    hook_util.hook_origin_input))

    aux_idx = aux_util.conv_layer_dict[select_layer]
    hook_layer_aux = aux_util.down_sample_layer[aux_idx]
    for name, child in prune_model.module_list.named_children():
        if name == select_layer:
            handles.append(
                child.BatchNorm2d.register_forward_hook(
                    hook_util.hook_prune_input))
        elif name == hook_layer_aux:
            handles.append(
                child.register_forward_hook(hook_util.hook_prune_input))

    aux_net = aux_util.creat_aux_list(416,
                                      device,
                                      conv_layer_name=select_layer)
    chkpt_aux = torch.load(aux_weight, map_location=device)
    aux_net.load_state_dict(chkpt_aux['aux{}'.format(aux_idx)])
    del chkpt_aux

    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        prune_model = torch.nn.parallel.DistributedDataParallel(
            prune_model, find_unused_parameters=True)
        prune_model.yolo_layers = prune_model.module.yolo_layers
        aux_net = torch.nn.parallel.DistributedDataParallel(
            aux_net, find_unused_parameters=True)

    nb = len(data_loader)
    prune_model.nc = 80
    prune_model.hyp = hyp
    prune_model.arc = 'default'
    prune_model.eval()
    aux_net.eval()
    MSE = nn.MSELoss(reduction='mean')

    greedy = torch.zeros(k)
    for i_k in range(k):
        pbar = tqdm(enumerate(data_loader), total=nb)
        print(('\n' + '%10s' * 8) % ('Stage', 'gpu_mem', 'iter', 'MSELoss',
                                     'PdLoss', 'AuxLoss', 'Total', 'targets'))
        for i, (imgs, targets, _, _) in pbar:
            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = prune_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets, prune_model)
            hook_util.cat_to_gpu0('prune')

            aux_pred = aux_net(hook_util.prune_features['gpu0'][1])
            aux_loss, _ = AuxNetUtils.compute_loss_for_aux(
                aux_pred, aux_net, targets)

            mse_loss = torch.zeros(1).to(device)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp['joint_loss'] * mse_loss + pruning_loss + aux_loss

            loss.backward()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            s = ('%10s' * 3 + '%10.3g' * 5) % (
                'Pruning ' + select_layer, '%.3gG' % mem, '%g/%g' %
                (i_k, k), mse_loss, pruning_loss, aux_loss, loss, len(targets))
            pbar.set_description(s)

            hook_util.clean_hook_out('origin')
            hook_util.clean_hook_out('prune')

        grad = prune_model.module.module_list[int(
            select_layer)].MaskConv2d.weight.grad.detach().clone()**2
        grad = grad.sum((2, 3)).sqrt().sum(0)

        if i_k == 0:
            prune_model.module.module_list[int(
                select_layer)].MaskConv2d.selected_channels_mask[:] = 1e-5
            _, non_greedy_indices = torch.topk(grad, k)
            logger.info('non greedy layer{}: selected==>{}'.format(
                select_layer, str(non_greedy_indices)))

        selected_channels_mask = prune_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask
        _, indices = torch.topk(grad * (1 - selected_channels_mask), 1)
        prune_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask[indices] = 1
        greedy[i_k] = indices
        logger.info('greedy layer{} iter{}: indices==>{}'.format(
            select_layer, str(i_k), str(indices)))

        prune_model.zero_grad()

        pbar = tqdm(enumerate(data_loader), total=nb)
        mloss = torch.zeros(4).to(device)
        print(('\n' + '%10s' * 8) % ('Stage', 'gpu_mem', 'iter', 'MSELoss',
                                     'PdLoss', 'AuxLoss', 'Total', 'targets'))
        for i, (imgs, targets, _, _) in pbar:

            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = prune_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets, prune_model)
            hook_util.cat_to_gpu0('prune')

            aux_pred = aux_net(hook_util.prune_features['gpu0'][1])
            aux_loss, _ = AuxNetUtils.compute_loss_for_aux(
                aux_pred, aux_net, targets)

            mse_loss = torch.zeros(1).to(device)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp['joint_loss'] * mse_loss + pruning_loss + aux_loss

            loss.backward()

            solve_sub_problem_optimizer.step()
            solve_sub_problem_optimizer.zero_grad()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            mloss = (mloss * i + torch.cat(
                [mse_loss, pruning_loss, aux_loss, loss]).detach()) / (i + 1)
            s = ('%10s' * 3 + '%10.3g' * 5) % ('SubProm ' + select_layer,
                                               '%.3gG' % mem, '%g/%g' %
                                               (i_k, k), *mloss, len(targets))
            pbar.set_description(s)

            hook_util.clean_hook_out('origin')
            hook_util.clean_hook_out('prune')

    for handle in handles:
        handle.remove()

    logger.info(
        ("greedy layer{}: selected==>{}".format(select_layer, str(greedy))))
예제 #19
0
class BBDetection():
    def __init__(self):
        print(os.getcwd())
        self.model_cfg = "./src/akhenaten_dv/scripts/Perception/BBoxDetection/model_cfg/yolo_baseline_tiny.cfg"
        self.weights_path = './src/akhenaten_dv/scripts/Perception/BBoxDetection/7.weights'
        self.conf_thres = 0.8
        self.nms_thres = 0.25
        self.vanilla_anchor = False
        self.xy_loss = 2
        self.wh_loss = 1.6
        self.no_object_loss = 25
        self.object_loss = 0.1
        cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if cuda else 'cpu')
        random.seed(0)
        torch.manual_seed(0)
        if cuda:
            torch.cuda.manual_seed(0)
            torch.cuda.manual_seed_all(0)
            torch.backends.cudnn.benchmark = True
            torch.cuda.empty_cache()
        self.model = Darknet(config_path=self.model_cfg,
                             xy_loss=self.xy_loss,
                             wh_loss=self.wh_loss,
                             no_object_loss=self.no_object_loss,
                             object_loss=self.object_loss,
                             vanilla_anchor=self.vanilla_anchor)
        # Load weights
        self.model.load_weights(self.weights_path,
                                self.model.get_start_weight_dim())
        self.model.to(self.device, non_blocking=True)

    def detect(self, cv_img):
        cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
        img = img_pil.fromarray(cv_img)
        w, h = img.size
        new_width, new_height = self.model.img_size()
        pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width)
        img = torchvision.transforms.functional.pad(img,
                                                    padding=(pad_w, pad_h,
                                                             pad_w, pad_h),
                                                    fill=(127, 127, 127),
                                                    padding_mode="constant")
        img = torchvision.transforms.functional.resize(img,
                                                       (new_height, new_width))

        bw = self.model.get_bw()
        if bw:
            img = torchvision.transforms.functional.to_grayscale(
                img, num_output_channels=1)

        img = torchvision.transforms.functional.to_tensor(img)
        img = img.unsqueeze(0)

        with torch.no_grad():
            self.model.eval()
            img = img.to(self.device, non_blocking=True)
            # output,first_layer,second_layer,third_layer = model(img)
            output = self.model(img)

            for detections in output:
                detections = detections[detections[:, 4] > self.conf_thres]
                box_corner = torch.zeros((detections.shape[0], 4),
                                         device=detections.device)
                xy = detections[:, 0:2]
                wh = detections[:, 2:4] / 2
                box_corner[:, 0:2] = xy - wh
                box_corner[:, 2:4] = xy + wh
                probabilities = detections[:, 4]
                nms_indices = nms(box_corner, probabilities, self.nms_thres)
                main_box_corner = box_corner[nms_indices]
                if nms_indices.shape[0] == 0:
                    continue
            bboxes = []
            for i in range(len(main_box_corner)):
                x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w
                y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h
                x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w
                y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h
                bboxes.append([x0, y0, x1, y1])

        return bboxes