예제 #1
0
def detect():
    path = './data/bus.png'
    im0 = cv2.imread(path)  # BGR
    assert im0 is not None, 'Image Not Found ' + path
    # img = letterbox(im0, (608,608 ), )[0]
    img = cv2.resize(im0, (608, 608))
    # img = im0
    draw_img = img
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img)

    img_size = (608, 608) 
    device = torch.device('cpu')
    cfg = './cfg/yolov4.cfg'
    model = Darknet(cfg, img_size)
    weights = './weights/yolov4.pt'
    model.load_state_dict(torch.load(weights, map_location=device)['model'])
    model.to(device).eval()
    img = torch.from_numpy(img).to(device)
    img = img.float()
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    with torch.no_grad():
        pred = model(img)

    # Apply NMS
    pred[:,:,:4] *= torch.Tensor(img_size*2) 
    pred = non_max_suppression(pred) 

    for i, det in enumerate(pred):
        if det is not None and len(det):
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
            for *xyxy, conf, cls in det:
                if conf > 0.7:
                    c1 = (int(xyxy[0].item()), int(xyxy[1].item()))
                    c2 = (int(xyxy[2].item()), int(xyxy[3].item()))
                    # color = tuple(np.random.randint(0,255,3))
                    # import ipdb;ipdb.set_trace()
                    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                    
                    cv2.rectangle(draw_img, c1, c2, color) 
                    print(conf.item(), cls.item())

    cv2.imshow("123", draw_img)
    cv2.waitKey(10000)
예제 #2
0
def Load_Yolo(device):

    #Load Darknet
    yolo_model_def = os.path.join(yolo_path, 'config/yolov3-tiny.cfg')
    yolo_img_size = 416
    yolo_weights_path = os.path.join(yolo_path, 'weights/yolov3-tiny.weights')
    model = Darknet(yolo_model_def, img_size=yolo_img_size).to(device)

    if yolo_weights_path.endswith(".weights"):
        # Load darknet weights
        model.load_darknet_weights(yolo_weights_path)
    else:
        # Load checkpoint weights
        model.load_state_dict(torch.load(yolo_weights_path))

    model.eval()  # Set in evaluation mode
    return model
예제 #3
0
def transform_to_onnx(cfg_file, weight_file, batch_size, in_h, in_w):
    model = Darknet(cfg_file)
    pre_dict = torch.load(weight_file, map_location=torch.device('cpu'))
    model.load_state_dict(pre_dict['model'])
    x = torch.ones((batch_size, 3, in_h, in_w), requires_grad=True)*120 /255.0
    onnx_file_name = 'model/yolov3.onnx'
    
    torch.onnx.export(model,
                      x,
                      onnx_file_name,
                      #export_params=True,
                      #opset_version=11,
                      #do_constant_folding=True,
                      input_names=['input'], output_names=['output1'])
                      #dynamic_axes=None)
    print('Onnx model exporting done')
    return onnx_file_name, x
예제 #4
0
    def build_model(self) -> nn.Module:

        opt = get_cli_args(batch_size=pedl_batch_size,
                           prebias=pedl_prebias,
                           accumulate=pedl_accumulate)
        hyp = get_hyp(lr0=pedl_init_lr)

        # Initialize model
        model = Darknet(opt.cfg, arc=opt.arc)  # .to(device)

        # Fetch starting weights
        # TODO Once download_data_fn is implemented this should go into download_data
        attempt_download(opt.weights)
        chkpt = torch.load(opt.weights)

        # load model
        try:
            chkpt["model"] = {
                k: v
                for k, v in chkpt["model"].items()
                if model.state_dict()[k].numel() == v.numel()
            }
            model.load_state_dict(chkpt["model"], strict=False)
        except KeyError as e:
            s = (
                "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. "
                "See https://github.com/ultralytics/yolov3/issues/657" %
                (opt.weights, opt.cfg, opt.weights))
            raise KeyError(s) from e

        del chkpt

        data_dict = get_data_cfg()
        nc = 1 if opt.single_cls else int(data_dict["classes"])
        model.nc = nc  # attach number of classes to model

        model.arc = opt.arc  # attach yolo architecture
        model.hyp = hyp  # attach hyperparameters to model

        train_dataset = LazyModule.get()

        # The model class weights depend on the dataset labels
        model.class_weights = labels_to_class_weights(
            train_dataset.labels, nc)  # attach class weights

        return model
예제 #5
0
def yolov3(linux):

    # Code
    if linux:
        YOLO_PATH = '/home/student/Desktop/Automated-Active-Surveillance-System-in-the-Detection-of-Cold-Steel-Weapons/dl_models/yolov3'
    else:
        base_path = os.path.dirname(os.path.realpath(__file__)) + '\\'
        YOLO_PATH = base_path + 'yolov3'
    sys.path.insert(0, YOLO_PATH)
    from models import Darknet
    opt = YoloConfig(linux)
    img_size = opt.img_size
    out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = Darknet(opt.cfg, img_size)
    model.load_state_dict(torch.load(weights)['model'])
    return model, opt
예제 #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_file", default="runs/config.json")
    parser.add_argument("--output_dir", default='output')
    parser.add_argument("--model_checkpoint")
    args = parser.parse_args()

    with open(args.config_file) as config_buffer:
        config = json.loads(config_buffer.read())

    val_loader_dict = dict()
    for i, dataset_config in enumerate(config['val']["datasets"]):
        val_dataset = VOCDetection(
            img_dir=dataset_config["image_folder"],
            annotation_dir=dataset_config["annot_folder"],
            cache_dir=dataset_config["cache_dir"],
            split_file=dataset_config['split_file'],
            img_size=config['model']['input_size'],
            filter_labels=config['model']['labels'],
            multiscale=False,
            augment=False)
        val_dataset.name = dataset_config.get('name')

        val_loader = DataLoader(val_dataset,
                                batch_size=config["val"]["batch_size"],
                                collate_fn=val_dataset.collate_fn,
                                shuffle=True)
        dataset_name = val_dataset.name if val_dataset.name else f"Dataset #{i}"
        val_loader_dict[dataset_name] = val_loader

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Darknet(config["model"]["config"]).to(device)
    model.load_state_dict(torch.load(args.model_checkpoint))
    model.eval()

    result_dict = evaluate(model, val_loader_dict, config["val"])
    for name, results in result_dict.items():
        output_str = f"{name} evaluation results:\n" \
            f"precision-{results['precision']},\n" \
            f"recall-{results['recall']},\n" \
            f"AP-{results['AP']},\n" \
            f"F1-{results['F1']},\n" \
            f"ap_class-{results['AP_class']}"
        print(output_str)
예제 #7
0
def mask_cfg_and_converted(mask_replace_layer,
                           cfg='cfg/yolov3-voc.cfg',
                           weight_path='../weights/converted-voc.pt',
                           target='../weights/maskconverted-voc.pt'):
    mask_cfg = '/'.join(cfg.split('/')[:-1]) + '/mask' + cfg.split('/')[-1]
    origin_mdfs = parse_model_cfg(cfg)
    mask_mdfs = []
    mask_mdfs.append(origin_mdfs.pop(0))
    for i, mdf in enumerate(origin_mdfs):
        if str(i) in mask_replace_layer:
            mdf['type'] = 'maskconvolutional'
        mask_mdfs.append(mdf)
    write_cfg(mask_cfg, mask_mdfs)

    mask_weight = OrderedDict()
    origin_weight = torch.load(weight_path)['model']
    for k, v in origin_weight.items():
        key_list = k.split('.')
        idx = key_list[1]
        if idx in mask_replace_layer and key_list[2] == 'Conv2d':
            key_list[2] = 'Mask' + key_list[2]
            key = '.'.join(key_list)
            mask_weight[key] = v
            mask_weight[key.replace('weight',
                                    'selected_channels_mask')] = torch.ones(
                                        v.size(1), dtype=torch.float32)
        else:
            key = '.'.join(key_list)
            mask_weight[key] = v

    model = Darknet(mask_cfg)
    model.load_state_dict(mask_weight, strict=True)
    if target is not None:
        chkpt = {
            'epoch': -1,
            'best_fitness': None,
            'training_results': None,
            'model': model.state_dict(),
            'optimizer': None
        }
        torch.save(chkpt, target)

    return mask_cfg, model.state_dict()
예제 #8
0
파일: yolo.py 프로젝트: waylonflinn/BBoxEE
    def __init__(self, data_config, net_config, weights, image_size):
        """Class init function."""
        QtCore.QThread.__init__(self)
        self.image_list = []
        self.threshold = 0.9
        self.image_directory = ''
        self.data = None
        self.image_size = image_size

        if torch.cuda.is_available():
            self.device = torch.device('cuda:0')
        else:
            self.device = torch.device('cpu')
        self.data_config = parse_data_config(data_config)
        # Extracts class labels from file
        self.classes = load_classes(self.data_config['names'])
        self.model = Darknet(net_config, image_size)

        checkpoint = torch.load(weights, map_location='cpu')
        self.model.load_state_dict(checkpoint['model'])
예제 #9
0
def convert(cfg='cfg/yolov4.cfg', weights='weights/yolov4.weights'):

    # Initialize model
    model = Darknet(cfg)

    if weights.endswith('.weights'):  # darknet format
        _ = load_darknet_weights(model, weights)

        chkpt = {
            'epoch': -1,
            'best_fitness': None,
            'training_results': None,
            'model': model.state_dict(),
            'optimizer': None
        }

        torch.save(chkpt, 'weights/yolov4.pt')
        print("Success: converted '%s' to 'converted.pt'" % weights)

    else:
        print('Error: extension not supported.')
def main(image_uri,
         output_uri,
         weights_uri,
         model_cfg,
         img_size,
         bw,
         conf_thres,
         nms_thres):

    cuda = torch.cuda.is_available()
    device = torch.device('cuda:0' if cuda else 'cpu')
    random.seed(0)
    torch.manual_seed(0)
    if cuda:
        torch.cuda.manual_seed(0)
        torch.cuda.manual_seed_all(0)
        torch.backends.cudnn.benchmark = True
        torch.cuda.empty_cache()

    model = Darknet(model_cfg, img_size)

    # Load weights
    weights_path = storage_client.get_file(weights_uri)
    if weights_path.endswith('.weights'):  # darknet format
        model.load_weights(weights_path)
    elif weights_path.endswith('.pt'):  # pytorch format
        checkpoint = torch.load(weights_path, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
    model.to(device, non_blocking=True)

    detect(image_uri,
           output_uri,
           model,
           img_size,
           bw,
           device=device,
           conf_thres=conf_thres,
           nms_thres=nms_thres)
예제 #11
0
def main(*, batch_size, model_cfg, weights_path, bbox_all, step, n_cpu):
    cuda = torch.cuda.is_available()
    device = torch.device('cuda:0' if cuda else 'cpu')
    random.seed(0)
    torch.manual_seed(0)
    if cuda:
        torch.cuda.manual_seed(0)
        torch.cuda.manual_seed_all(0)
        torch.backends.cudnn.benchmark = True
        torch.cuda.empty_cache()

    # Initiate model
    model = Darknet(model_cfg)
    validate_uri, _, weights_uri = model.get_links()
    _, _, _, _, bw = model.get_dataAug()
    num_images, _ = model.num_images()

    # Load weights
    model.load_weights(weights_path, model.get_start_weight_dim())
    model.to(device, non_blocking=True)

    # Get dataloader
    dataloader = torch.utils.data.DataLoader(ImageLabelDataset(
        validate_uri,
        height=img_height,
        width=img_width,
        augment_hsv=False,
        augment_affine=False,
        num_images=num_images,
        bw=bw,
        n_cpu=n_cpu,
        lr_flip=False,
        ud_flip=False),
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=n_cpu,
                                             pin_memory=True)
    return validate(dataloader, model, device, step, bbox_all)
예제 #12
0
def main(args):
    import torch
    from os.path import join, dirname
    from models import Darknet, save_weights, parse_model_cfg
    from utils.prune_utils import write_cfg
    cfg = parse_model_cfg(args.cfg)
    idx = list(range(5))
    if args.class_idx != '-1':
        idx += [int(x) + 5
                for x in args.class_idx.split(',')]  # box + obj + cls
    else:
        for i, c in enumerate(cfg):
            if c['type'] == 'yolo':
                nf = int(cfg[i - 1]['filters'])
                nc = int(c['classes'])
                na = nf // (5 + nc)
                c['classes'] = '0'
                cfg[i - 1]['filters'] = str(nf - na * nc)
    model = Darknet(cfg)
    na = model.module_list[model.yolo_layers[0]].na  # anchor number per point
    sd0 = model.state_dict()
    if args.src.endswith('.pt'):
        sd1 = torch.load(args.src)['model']
    else:
        raise NotImplementedError('darknet weights not supported')

    for k in sd0:
        if sd0[k].shape != sd1[k].shape:
            sd1[k] = torch.cat([x[idx] for x in sd1[k].chunk(na)])
    model.load_state_dict(sd1)
    save_weights(model, args.dst)
    from os.path import basename, splitext
    cfg_path = args.cfg.replace(
        splitext(basename(args.cfg))[0],
        splitext(basename(args.dst))[0])
    write_cfg(cfg_path, [model.hyperparams] + model.module_defs)
    print('cfg saved to:', cfg_path)
예제 #13
0
def load_checkpoint(file_name, device, S, B, C, cfg=None):
    print('Loading checkpoint...')
    if cfg is None:
        checkpoint = torch.load(file_name)
        cfg = checkpoint['cfg']
    BACKBONE = cfg['MODEL']
    N_LAYERS = cfg.get('N_LAYERS', 0)
    MIN_IMAGES = cfg['MIN_IMAGES']
    DATASET_DIR = cfg['DATASET_DIR']
    print(MIN_IMAGES, DATASET_DIR)

    print('backbone:', BACKBONE)
    if BACKBONE == 'Darknet':
        n_classes = get_n_classes(MIN_IMAGES, root=DATASET_DIR)
        backbone = Darknet(n_layers=N_LAYERS, num_classes=n_classes)
        backbone.load_state_dict(checkpoint['model'])
        model = YoloV1_pretrained(backbone=backbone,
                                  n_layers=N_LAYERS,
                                  grid_size=S,
                                  num_boxes=B,
                                  num_classes=C).to(device)

    # for param in backbone.parameters():
    #     param.requires_grad = False

    elif BACKBONE == 'VGG16':
        model = OD_backbone(bb_name=BACKBONE,
                            grid_size=S,
                            num_boxes=B,
                            num_classes=C).to(device)

    if BACKBONE == 'Darknet':
        epoch = checkpoint['epoch']
    else:
        epoch = 0
    return model, cfg, epoch
def yolov3(linux):
    # image_path = "/home/student/Desktop/Automated-Active-Surveillance-System-in-the-Detection-of-Cold-Steel-Weapons/models/yolov3/meme.jpg"
    # # os.system(f"cd yolov3")
    # os.chdir("yolov3")
    # os.system(f"python3 detect.py --source {image_path} --cfg cfg/yolov3-spp.cfg --weights yolov3-spp.pt")
    # os.chdir("..")
    # YOLOV3_PATH = '/home/student/Desktop/Automated-Active-Surveillance-System-in-the-Detection-of-Cold-Steel-Weapons/models/yolov3'
    # sys.path.insert(0, YOLOV3_PATH)

    # Code
    if linux:
        YOLO_PATH = '/home/student/Desktop/Automated-Active-Surveillance-System-in-the-Detection-of-Cold-Steel-Weapons/dl_models/yolov3'
    else:
        YOLO_PATH = 'D:\GitLab_respos\Automated-Active-Surveillance-System-in-the-Detection-of-Cold-Steel-Weapons\dl_models\yolov3'
    sys.path.insert(0, YOLO_PATH)
    from models import Darknet
    opt = YoloConfig(linux)
    img_size = opt.img_size
    out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = Darknet(opt.cfg, img_size)
    model.load_state_dict(torch.load(weights)['model'])
    return model, opt
예제 #15
0
def channels_select(prune_cfg, data, origin_model, aux_util, device,
                    data_loader, select_layer, pruned_rate):
    with open(progress_result, 'a') as f:
        f.write(('\n' + '%10s' * 9 + '\n') %
                ('Stage', 'Change', 'MSELoss', 'AuxLoss', 'Total', 'P', 'R',
                 '[email protected]', 'F1'))
    logger.info(('%10s' * 6) %
                ('Stage', 'Channels', 'Batch', 'MSELoss', 'AuxLoss', 'Total'))

    batch_size = data_loader.batch_size
    img_size = data_loader.dataset.img_size
    accumulate = 64 // batch_size
    hook_util = HookUtils()
    handles = []
    n_iter = math.floor(500 / batch_size)

    pruning_model = Darknet(prune_cfg,
                            img_size=(img_size, img_size)).to(device)
    chkpt = torch.load(progress_chkpt, map_location=device)
    pruning_model.load_state_dict(chkpt['model'], strict=True)

    aux_in_layer = aux_util.conv_layer_dict[select_layer]
    aux_model = aux_util.creat_aux_model(aux_in_layer)
    aux_model.to(device)

    aux_model.load_state_dict(chkpt['aux_in{}'.format(aux_in_layer)],
                              strict=True)
    aux_loss_scalar = max(0.01, pow((int(aux_in_layer) + 1) / 75, 2))

    del chkpt

    solve_sub_problem_optimizer = optim.SGD(
        pruning_model.module_list[int(aux_in_layer)].MaskConv2d.parameters(),
        lr=hyp['lr0'],
        momentum=hyp['momentum'])

    for name, child in origin_model.module_list.named_children():
        if name == aux_in_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_origin_output))
        if name == select_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_origin_output))

    for name, child in pruning_model.module_list.named_children():
        if name == aux_in_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_prune_output))
        if name == select_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_prune_output))

    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        origin_model = torch.nn.parallel.DistributedDataParallel(
            origin_model, find_unused_parameters=True)
        origin_model.yolo_layers = origin_model.module.yolo_layers
        pruning_model = torch.nn.parallel.DistributedDataParallel(
            pruning_model, find_unused_parameters=True)
        pruning_model.yolo_layers = pruning_model.module.yolo_layers

    retain_channels_num = math.floor(
        aux_util.layer_info[select_layer]["in_channels"] * (1 - pruned_rate))
    pruning_model.nc = 80
    pruning_model.hyp = hyp
    pruning_model.arc = 'default'
    pruning_model.eval()
    aux_model.eval()
    MSE = nn.MSELoss(reduction='mean')
    mloss = torch.zeros(3).to(device)

    for i_k in range(retain_channels_num):

        data_iter = iter(data_loader)
        pbar = tqdm(range(n_iter), total=n_iter)
        print(('\n' + '%10s' * 6) %
              ('Stage', 'gpu_mem', 'channels', 'MSELoss', 'AuxLoss', 'Total'))
        for i in pbar:

            imgs, targets, _, _ = data_iter.next()

            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = pruning_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets,
                                           pruning_model)

            hook_util.cat_to_gpu0()
            mse_loss = torch.zeros(1, device=device)

            aux_pred = aux_model(hook_util.prune_features['gpu0'][1], targets)
            aux_loss = compute_loss_for_DCP(aux_pred, targets)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp['joint_loss'] * mse_loss + aux_loss + 0 * pruning_loss

            loss.backward()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            s = ('%10s' * 3 + '%10.3g' * 3) % (
                'Prune ' + select_layer, '%.3gG' % mem, '%g/%g' %
                (i_k, retain_channels_num), hyp['joint_loss'] * mse_loss,
                aux_loss, loss)
            pbar.set_description(s)

            # if (i + 1) % 10 == 0:
            #     logger.info(('%10s' * 3 + '%10.3g' * 3) %
            #                 ('Prune' + select_layer, str(i_k), '%g/%g' % (i, n_iter), hyp['joint_loss'] * mse_loss,
            #                  aux_loss, loss))

            hook_util.clean_hook_out()

        grad = pruning_model.module.module_list[int(
            select_layer)].MaskConv2d.weight.grad.detach()**2
        grad = grad.sum((2, 3)).sqrt().sum(0)

        if i_k == 0:
            pruning_model.module.module_list[int(
                select_layer)].MaskConv2d.selected_channels_mask[:] = 1e-5
            if select_layer in aux_util.sync_guide.keys():
                sync_layer = aux_util.sync_guide[select_layer]
                pruning_model.module.module_list[int(
                    sync_layer)].MaskConv2d.selected_channels_mask[(
                        -1 * aux_util.layer_info[select_layer]["in_channels"]
                    ):] = 1e-5

        selected_channels_mask = pruning_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask
        _, indices = torch.topk(grad * (1 - selected_channels_mask), 1)

        pruning_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask[indices] = 1
        if select_layer in aux_util.sync_guide.keys():
            pruning_model.module.module_list[int(
                sync_layer)].MaskConv2d.selected_channels_mask[-(
                    aux_util.layer_info[select_layer]["in_channels"] -
                    indices)] = 1

        pruning_model.zero_grad()

        pbar = tqdm(range(n_iter), total=n_iter)
        print(('\n' + '%10s' * 6) %
              ('Stage', 'gpu_mem', 'channels', 'MSELoss', 'AuxLoss', 'Total'))
        for i in pbar:

            imgs, targets, _, _ = data_iter.next()

            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = pruning_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets,
                                           pruning_model)

            hook_util.cat_to_gpu0()
            mse_loss = torch.zeros(1, device=device)

            aux_pred = aux_model(hook_util.prune_features['gpu0'][1], targets)
            aux_loss = compute_loss_for_DCP(aux_pred, targets)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp[
                'joint_loss'] * mse_loss + aux_loss_scalar * aux_loss + 0 * pruning_loss

            loss.backward()

            if i % accumulate == 0:
                solve_sub_problem_optimizer.step()
                solve_sub_problem_optimizer.zero_grad()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            mloss = (mloss * i +
                     torch.cat([hyp['joint_loss'] * mse_loss, aux_loss, loss
                                ]).detach()) / (i + 1)
            s = ('%10s' * 3 + '%10.3g' * 3) % (
                'SubProm ' + select_layer, '%.3gG' % mem, '%g/%g' %
                (i_k, retain_channels_num), *mloss)
            pbar.set_description(s)

            if (i + 1) % n_iter == 0:
                logger.info(('%10s' * 3 + '%10.3g' * 3) %
                            ('SubPro' + select_layer, str(i_k), '%g/%g' %
                             (i, n_iter), *mloss))

            hook_util.clean_hook_out()

    for handle in handles:
        handle.remove()

    greedy_indices = pruning_model.module.module_list[int(
        select_layer)].MaskConv2d.selected_channels_mask < 1
    pruning_model.module.module_list[int(
        select_layer)].MaskConv2d.selected_channels_mask[greedy_indices] = 0

    res, _ = test.test(prune_cfg,
                       data,
                       batch_size=batch_size * 2,
                       img_size=416,
                       model=pruning_model,
                       conf_thres=0.1,
                       iou_thres=0.5,
                       save_json=False,
                       dataloader=None)

    chkpt = torch.load(progress_chkpt, map_location=device)
    chkpt['current_layer'] = aux_util.next_prune_layer(select_layer)
    chkpt['epoch'] = -1
    chkpt['model'] = pruning_model.module.state_dict() if type(
        pruning_model
    ) is nn.parallel.DistributedDataParallel else pruning_model.state_dict()
    chkpt['optimizer'] = None

    torch.save(chkpt, progress_chkpt)

    torch.save(chkpt, last)
    del chkpt

    with open(progress_result, 'a') as f:
        f.write(('%10s' * 2 + '%10.3g' * 7) %
                ('Pruning ' + select_layer,
                 str(aux_util.layer_info[select_layer]['in_channels']) + '->' +
                 str(retain_channels_num), *mloss, *res[:4]) + '\n')

    torch.cuda.empty_cache()
# Get data configuration
data_config = parse_data_config(opt.data_config_path)
# Get hyper parameters
hyperparams = parse_model_config(opt.model_config_path)[0]
learning_rate = float(hyperparams['learning_rate'])
momentum = float(hyperparams['momentum'])
decay = float(hyperparams['decay'])
burn_in = int(hyperparams['burn_in'])

# Initiate model

dirlist = os.listdir('./yolodata/')
dirlist.remove('showTruth.m')
for dir in sorted(dirlist):
    # Get dataloader
    model = Darknet(opt.model_config_path)
    model.load_weights(opt.weights_path)

    if cuda:
        model = model.cuda()

    model.train()
    train_path = './yolodata/' + dir + '/train/indextrain.txt'
    dataloader = torch.utils.data.DataLoader(ListDataset(train_path),
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.n_cpu)

    Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

    optimizer = optim.SGD(model.parameters(),
예제 #17
0
from data_utils import MyTestDataset, get_test_transforms
from models import Darknet
from utils import non_max_suppression

from conf.settings import BASE_DIR

models_path = os.path.join(BASE_DIR, "models")
images_path = os.path.join(BASE_DIR, "images")

if __name__ == "__main__":
    attempt = 4

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"Running on {device}...")

    model = Darknet(os.path.join(
        BASE_DIR, "yolo_v3/config/yolov3-custom.cfg")).to(device)
    model.load_state_dict(
        torch.load(os.path.join(models_path, "yolo_v3_4_17.pt"),
                   map_location=device))

    model2 = Darknet(os.path.join(
        BASE_DIR, "yolo_v3/config/yolov3-custom.cfg")).to(device)
    model2.load_state_dict(
        torch.load(os.path.join(models_path, "yolo_v3_4_20.pt"),
                   map_location=device))

    model3 = Darknet(os.path.join(
        BASE_DIR, "yolo_v3/config/yolov3-custom.cfg")).to(device)
    model3.load_state_dict(
        torch.load(os.path.join(models_path, "yolo_v3_4_25.pt"),
                   map_location=device))
예제 #18
0
from models import Darknet
from utils.utils import *
from utils.datasets import pad_to_square
from PIL import Image

from torchvision import transforms
import sys

weights_path = 'weights/yolov3.weights'
model = Darknet(config_path="config/yolov3.cfg", img_size=416)
model.load_darknet_weights(weights_path)
img_path  = '../imgs/traffic.jpg'

label_file = "data/coco.names"
classes = load_classes(label_file)
vehicles = [2, 3, 5, 7]
transform = transforms.ToTensor()


def get_vehicles(img_path, conf_thres=0.8):
    img = Image.open(img_path)
    img = transform(img)
    img = F.interpolate(img.unsqueeze(0), size=416, mode="nearest").squeeze(0)
    result = model(torch.unsqueeze(img, 0))
    (result, ) = non_max_suppression(result, conf_thres, 0.4)
    count = 0
    if result is None:
        return 0
    for instance in result:
        if instance[6] in vehicles:
            count += 1
예제 #19
0
def train_aux_for_LCP(cfg, backbone, neck, data_loader, weights, aux_weight,
                      hyp, device, resume, epochs):
    init_seeds()
    batch_size = data_loader.batch_size
    accumulate = 64 // batch_size

    model = Darknet(cfg).to(device)
    model_chkpt = torch.load(weights, map_location=device)
    model.load_state_dict(model_chkpt['model'], strict=True)
    del model_chkpt
    aux_util = AuxNetUtils(model, hyp, backbone, neck, strategy="LCP")
    hook_util = HookUtils()

    start_epoch = 0

    aux_model_list = []
    pg = []
    for layer in aux_util.aux_in_layer:
        aux_model = aux_util.creat_aux_model(layer)
        aux_model.to(device)
        for v in aux_model.parameters():
            pg += [v]
        aux_model_list.append(aux_model)

    optimizer = optim.SGD(pg,
                          lr=hyp['lr0'],
                          momentum=hyp['momentum'],
                          nesterov=True)
    del pg

    if resume:
        chkpt = torch.load(aux_weight, map_location=device)

        for i, layer in enumerate(aux_util.aux_in_layer):
            aux_model_list[i].load_state_dict(chkpt['aux_in{}'.format(layer)],
                                              strict=True)

        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])

        start_epoch = chkpt['epoch'] + 1

    scheduler = lr_scheduler.MultiStepLR(
        optimizer, milestones=[epochs // 3, 2 * epochs // 3], gamma=0.1)
    scheduler.last_epoch = start_epoch - 1

    handles = []  # 结束训练后handle需要回收
    for name, child in model.module_list.named_children():
        if name in aux_util.aux_in_layer:
            handles.append(
                child.register_forward_hook(hook_util.hook_origin_output))

    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        model = nn.parallel.DistributedDataParallel(
            model, find_unused_parameters=True)
        model.yolo_layers = model.module.yolo_layers

    nb = len(data_loader)
    model.nc = 80
    model.hyp = hyp
    model.arc = 'default'
    print('Starting training for %g epochs...' % epochs)
    for epoch in range(start_epoch, epochs):

        for aux_model in aux_model_list:
            aux_model.train()
        print(('\n' + '%10s' * 6) %
              ('Stage', 'Epoch', 'gpu_mem', 'AuxID', 'cls', 'targets'))

        # -----------------start batch-----------------
        pbar = tqdm(enumerate(data_loader), total=nb)
        model.train()
        for i, (imgs, targets, _, _) in pbar:

            if len(targets) == 0:
                continue

            ni = i + nb * epoch
            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                prediction = model(imgs)

            hook_util.cat_to_gpu0()
            for aux_idx, aux_model in enumerate(aux_model_list):
                pred, loc_loss = aux_model(
                    hook_util.origin_features['gpu0'][aux_idx], targets,
                    prediction)
                loss = compute_loss_for_LCP(pred, loc_loss, targets)

                loss *= batch_size / 64

                loss.backward()

                mem = torch.cuda.memory_cached(
                ) / 1E9 if torch.cuda.is_available() else 0  # (GB)
                s = ('%10s' * 3 + '%10.3g' * 3) % ('Train Aux', '%g/%g' %
                                                   (epoch, epochs - 1),
                                                   '%.3gG' % mem, aux_idx,
                                                   loss, len(targets))
                pbar.set_description(s)
            # 每个batch后要把hook_out内容清除
            hook_util.clean_hook_out()
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()
        # -----------------end batches-----------------

        scheduler.step()
        final_epoch = epoch + 1 == epochs
        chkpt = {
            'epoch': epoch,
            'optimizer': None if final_epoch else optimizer.state_dict()
        }
        for i, layer in enumerate(aux_util.aux_in_layer):
            chkpt['aux_in{}'.format(layer)] = aux_model_list[i].state_dict()

        torch.save(chkpt, aux_weight)

        torch.save(chkpt, "../weights/LCP/aux-coco.pt")
        del chkpt

        with open("./LCP/aux_result.txt", 'a') as f:
            f.write(s + '\n')

    # 最后要把hook全部删除
    for handle in handles:
        handle.remove()
    torch.cuda.empty_cache()
예제 #20
0
def prune(mask_cfg, progress_weights, mask_replace_layer, new_cfg_file,
          new_weights):
    only_in = mask_replace_layer[-3:]
    mask_replace_layer = mask_replace_layer[:-2]

    device_in = torch.device('cpu')
    model = Darknet(mask_cfg)
    chkpt = torch.load(progress_weights, map_location=device_in)
    model.load_state_dict(chkpt['model'])

    new_cfg = parse_model_cfg(mask_cfg)

    for layer in mask_replace_layer[:-1]:
        assert isinstance(model.module_list[int(layer)][0],
                          MaskConv2d), "Not a pruned model!"
        tail_layer = mask_replace_layer[mask_replace_layer.index(layer) + 1]
        assert isinstance(model.module_list[int(tail_layer)][0],
                          MaskConv2d), "Not a pruned model!"
        in_channels_mask = model.module_list[int(
            layer)][0].selected_channels_mask
        out_channels_mask = model.module_list[int(
            tail_layer)][0].selected_channels_mask

        in_channels = int(torch.sum(in_channels_mask))
        out_channels = int(torch.sum(out_channels_mask))

        new_cfg[int(layer) + 1]['type'] = 'convolutional'
        new_cfg[int(layer) + 1]['filters'] = str(out_channels)

        new_conv = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size=model.module_list[int(layer)][0].kernel_size,
            stride=model.module_list[int(layer)][0].stride,
            padding=model.module_list[int(layer)][0].padding,
            bias=False)
        thin_weight = model.module_list[int(layer)][0].weight[
            out_channels_mask.bool()]
        thin_weight = thin_weight[:, in_channels_mask.bool()]
        assert new_conv.weight.numel() == thin_weight.numel(
        ), 'Do not match in shape!'
        new_conv.weight.data.copy_(thin_weight.data)

        new_batch = nn.BatchNorm2d(out_channels, momentum=0.1)
        new_batch.weight.data.copy_(model.module_list[int(layer)][1].weight[
            out_channels_mask.bool()].data)
        new_batch.bias.data.copy_(model.module_list[int(layer)][1].bias[
            out_channels_mask.bool()].data)
        new_batch.running_mean.copy_(model.module_list[int(
            layer)][1].running_mean[out_channels_mask.bool()].data)
        new_batch.running_var.copy_(model.module_list[int(
            layer)][1].running_var[out_channels_mask.bool()].data)
        new_module = nn.Sequential()
        new_module.add_module('Conv2d', new_conv)
        new_module.add_module('BatchNorm2d', new_batch)
        new_module.add_module('activation', model.module_list[int(layer)][2])
        model.module_list[int(layer)] = new_module

    for layer in only_in:
        new_cfg[int(layer) + 1]['type'] = 'convolutional'
        assert isinstance(model.module_list[int(layer)][0],
                          MaskConv2d), "Not a pruned model!"
        in_channels_mask = model.module_list[int(
            layer)][0].selected_channels_mask > 0.1
        in_channels = int(torch.sum(in_channels_mask))

        new_conv = nn.Conv2d(
            in_channels,
            out_channels=model.module_list[int(layer)][0].out_channels,
            kernel_size=model.module_list[int(layer)][0].kernel_size,
            stride=model.module_list[int(layer)][0].stride,
            padding=model.module_list[int(layer)][0].padding,
            bias=False)
        new_conv.weight.data.copy_(model.module_list[int(layer)]
                                   [0].weight[:, in_channels_mask.bool()].data)

        new_module = nn.Sequential()
        new_module.add_module('Conv2d', new_conv)
        new_module.add_module('BatchNorm2d', model.module_list[int(layer)][1])
        new_module.add_module('activation', model.module_list[int(layer)][2])
        model.module_list[int(layer)] = new_module

    write_cfg(new_cfg_file, new_cfg)
    chkpt = {
        'epoch': -1,
        'best_fitness': None,
        'training_results': None,
        'model': model.state_dict(),
        'optimizer': None
    }
    torch.save(chkpt, new_weights)
예제 #21
0
def greedy_channel_select(origin_model, prune_cfg, origin_weights,
                          select_layer, device, aux_util, data_loader,
                          pruned_rate):
    init_state_dict = mask_converted(prune_cfg, origin_weights, target=None)

    prune_model = Darknet(prune_cfg).to(device)
    prune_model.load_state_dict(init_state_dict, strict=True)
    del init_state_dict
    solve_sub_problem_optimizer = optim.SGD(
        prune_model.module_list[int(select_layer)].MaskConv2d.parameters(),
        lr=hyp['lr0'],
        momentum=hyp['momentum'])
    hook_util = HookUtils()
    handles = []

    info = aux_util.layer_info[int(select_layer)]
    in_channels = info['in_channels']
    remove_k = math.floor(in_channels * pruned_rate)
    k = in_channels - remove_k

    for name, child in origin_model.module_list.named_children():
        if name == select_layer:
            handles.append(
                child.BatchNorm2d.register_forward_hook(
                    hook_util.hook_origin_input))

    aux_idx = aux_util.conv_layer_dict[select_layer]
    hook_layer_aux = aux_util.down_sample_layer[aux_idx]
    for name, child in prune_model.module_list.named_children():
        if name == select_layer:
            handles.append(
                child.BatchNorm2d.register_forward_hook(
                    hook_util.hook_prune_input))
        elif name == hook_layer_aux:
            handles.append(
                child.register_forward_hook(hook_util.hook_prune_input))

    aux_net = aux_util.creat_aux_list(416,
                                      device,
                                      conv_layer_name=select_layer)
    chkpt_aux = torch.load(aux_weight, map_location=device)
    aux_net.load_state_dict(chkpt_aux['aux{}'.format(aux_idx)])
    del chkpt_aux

    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        prune_model = torch.nn.parallel.DistributedDataParallel(
            prune_model, find_unused_parameters=True)
        prune_model.yolo_layers = prune_model.module.yolo_layers
        aux_net = torch.nn.parallel.DistributedDataParallel(
            aux_net, find_unused_parameters=True)

    nb = len(data_loader)
    prune_model.nc = 80
    prune_model.hyp = hyp
    prune_model.arc = 'default'
    prune_model.eval()
    aux_net.eval()
    MSE = nn.MSELoss(reduction='mean')

    greedy = torch.zeros(k)
    for i_k in range(k):
        pbar = tqdm(enumerate(data_loader), total=nb)
        print(('\n' + '%10s' * 8) % ('Stage', 'gpu_mem', 'iter', 'MSELoss',
                                     'PdLoss', 'AuxLoss', 'Total', 'targets'))
        for i, (imgs, targets, _, _) in pbar:
            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = prune_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets, prune_model)
            hook_util.cat_to_gpu0('prune')

            aux_pred = aux_net(hook_util.prune_features['gpu0'][1])
            aux_loss, _ = AuxNetUtils.compute_loss_for_aux(
                aux_pred, aux_net, targets)

            mse_loss = torch.zeros(1).to(device)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp['joint_loss'] * mse_loss + pruning_loss + aux_loss

            loss.backward()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            s = ('%10s' * 3 + '%10.3g' * 5) % (
                'Pruning ' + select_layer, '%.3gG' % mem, '%g/%g' %
                (i_k, k), mse_loss, pruning_loss, aux_loss, loss, len(targets))
            pbar.set_description(s)

            hook_util.clean_hook_out('origin')
            hook_util.clean_hook_out('prune')

        grad = prune_model.module.module_list[int(
            select_layer)].MaskConv2d.weight.grad.detach().clone()**2
        grad = grad.sum((2, 3)).sqrt().sum(0)

        if i_k == 0:
            prune_model.module.module_list[int(
                select_layer)].MaskConv2d.selected_channels_mask[:] = 1e-5
            _, non_greedy_indices = torch.topk(grad, k)
            logger.info('non greedy layer{}: selected==>{}'.format(
                select_layer, str(non_greedy_indices)))

        selected_channels_mask = prune_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask
        _, indices = torch.topk(grad * (1 - selected_channels_mask), 1)
        prune_model.module.module_list[int(
            select_layer)].MaskConv2d.selected_channels_mask[indices] = 1
        greedy[i_k] = indices
        logger.info('greedy layer{} iter{}: indices==>{}'.format(
            select_layer, str(i_k), str(indices)))

        prune_model.zero_grad()

        pbar = tqdm(enumerate(data_loader), total=nb)
        mloss = torch.zeros(4).to(device)
        print(('\n' + '%10s' * 8) % ('Stage', 'gpu_mem', 'iter', 'MSELoss',
                                     'PdLoss', 'AuxLoss', 'Total', 'targets'))
        for i, (imgs, targets, _, _) in pbar:

            if len(targets) == 0:
                continue

            imgs = imgs.to(device).float(
            ) / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            with torch.no_grad():
                _ = origin_model(imgs)

            _, pruning_pred = prune_model(imgs)
            pruning_loss, _ = compute_loss(pruning_pred, targets, prune_model)
            hook_util.cat_to_gpu0('prune')

            aux_pred = aux_net(hook_util.prune_features['gpu0'][1])
            aux_loss, _ = AuxNetUtils.compute_loss_for_aux(
                aux_pred, aux_net, targets)

            mse_loss = torch.zeros(1).to(device)
            mse_loss += MSE(hook_util.prune_features['gpu0'][0],
                            hook_util.origin_features['gpu0'][0])

            loss = hyp['joint_loss'] * mse_loss + pruning_loss + aux_loss

            loss.backward()

            solve_sub_problem_optimizer.step()
            solve_sub_problem_optimizer.zero_grad()

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            mloss = (mloss * i + torch.cat(
                [mse_loss, pruning_loss, aux_loss, loss]).detach()) / (i + 1)
            s = ('%10s' * 3 + '%10.3g' * 5) % ('SubProm ' + select_layer,
                                               '%.3gG' % mem, '%g/%g' %
                                               (i_k, k), *mloss, len(targets))
            pbar.set_description(s)

            hook_util.clean_hook_out('origin')
            hook_util.clean_hook_out('prune')

    for handle in handles:
        handle.remove()

    logger.info(
        ("greedy layer{}: selected==>{}".format(select_layer, str(greedy))))
예제 #22
0
def stream(cfg,
           classes_file,
           weights,
           socket_ip,
           socket_port,
           image_size=128,
           confidence_threshold=0.6,
           nms_thres=0.5):
    print('+ Initializing model')
    model = Darknet(cfg, image_size)
    print('+ Loading model')
    load_darknet_weights(model, weights)
    print('+ Fusing model')
    model.fuse()
    print('+ Loading model to CPU')
    model.to('cpu').eval()
    print('+ Loading webcam')
    cap = LoadKinect(img_size=image_size)
    print('+ Loading classes')
    classes = load_classes(classes_file)
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(classes))]
    print('+ Connecting to remote socket')
    global sock
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.connect((socket_ip, socket_port))
    print('+ Enumerating cam')
    for counter, (path, img, im0, vid_cap) in enumerate(cap):
        t = time.time()

        print('+ Loading image to CPU')
        img = torch.from_numpy(img).unsqueeze(0).to('cpu')
        pred, _ = model(img)
        print('+ Detecting objects')
        det = non_max_suppression(pred, confidence_threshold, nms_thres)[0]

        if det is not None and len(det) > 0:
            detected_classes = []
            print('+ Rescaling model')
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      im0.shape).round()

            print('+ Reading depth')

            depth = get_depth()
            depth_swap = np.swapaxes(depth, 0, 1)

            depth_strip1d = np.array([
                np.sort(stripe)[100] for stripe in depth_swap
            ]).astype(np.uint8)
            depth_strip2d_swap = np.array([
                np.ones(depth_swap.shape[1]) * depth for depth in depth_strip1d
            ]).astype(np.uint8)
            depth_strip2d = np.swapaxes(depth_strip2d_swap, 0, 1)

            depth_edge1d = np.zeros(depth_strip1d.shape)

            state = False
            for counter, _ in np.ndenumerate(depth_edge1d[:-1]):
                state = True if not state and depth_strip1d[
                    counter] < 230 else False
                depth_edge1d[counter[0]] = not state

            state = False
            state_cnt = 0
            for counter, _ in np.ndenumerate(depth_edge1d[:-1]):
                counter = counter[0]
                if depth_edge1d[counter] == state:
                    state_cnt += 1
                else:
                    if state_cnt < 10:
                        for r in range(max(0, counter - 10), counter):
                            depth_edge1d[counter] = state
                    state_cnt = 0
                    state = depth_edge1d[counter]

            depth_edge1d = depth_edge1d * 255

            depth_edge2d_swap = np.array([
                np.ones(100) * awddawd for awddawd in depth_edge1d
            ]).astype(np.uint8)
            depth_edge2d = np.swapaxes(depth_edge2d_swap, 0, 1)

            for *coordinates, conf, cls_conf, cls in det:
                if classes[int(cls)] in RISKY_CLASSES:
                    label = '%s %.2f' % (classes[int(cls)], conf)
                    plot_one_box(coordinates,
                                 im0,
                                 label=label,
                                 color=colors[int(cls)])
                    print(f"+ Detected {classes[int(cls)]}")
                    x_avg_depth = np.mean(depth[coordinates[0] -
                                                5:coordinates[0] + 5])
                    y_avg_depth = np.mean(depth[coordinates[1] -
                                                5:coordinates[1] + 5])
                    detected_classes.append({
                        classes[int(cls)]: {
                            'x': coordinates[0],
                            'y': coordinates[1],
                            'z':
                            np.average(np.array([x_avg_depth, y_avg_depth]))
                        }
                    })

            n = []
            for counter in detected_classes:
                width = im0.shape[1]
                x, y, z = counter[list(counter.keys())[0]].values()
                phi = (x / width * 2 - 1) * (CAMERA_FOV / 2)
                n.append(f"{list(counter.keys())[0]};{phi};{z}|")
            sock.send(''.join(str(x) for x in n)[:-1].encode('utf-8'))
        print('+ Cycle took %.3fs' % (time.time() - t))
        plt.imshow(bgr_to_rgb(im0))
        plt.show(block=False)
        plt.pause(.001)
예제 #23
0
def get_thin_model(cfg,
                   backbone,
                   neck,
                   data,
                   origin_weights,
                   img_size,
                   batch_size,
                   prune_rate,
                   aux_epochs=50,
                   ft_epochs=15,
                   resume=False,
                   cache_images=False,
                   start_layer='75'):
    init_seeds()

    # -----------------dataset-----------------
    data_dict = parse_data_cfg(data)
    train_path = data_dict['train']
    test_path = data_dict['valid']

    dataset = LoadImagesAndLabels(
        train_path,
        img_size,
        batch_size,
        augment=True,
        hyp=hyp,  # augmentation hyperparameters
        rect=False,  # rectangular training
        cache_labels=True,
        cache_images=cache_images)

    batch_size = min(batch_size, len(dataset))
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    train_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=nw,
        shuffle=True,  # Shuffle=True unless rectangular training is used
        pin_memory=True,
        collate_fn=dataset.collate_fn)

    test_loader = torch.utils.data.DataLoader(LoadImagesAndLabels(
        test_path,
        img_size,
        batch_size * 2,
        hyp=hyp,
        rect=True,
        cache_labels=True,
        cache_images=cache_images),
                                              batch_size=batch_size * 2,
                                              num_workers=nw,
                                              pin_memory=True,
                                              collate_fn=dataset.collate_fn)
    # -----------------dataset-----------------

    # -----------get trained aux net-----------
    if aux_trained:
        aux_chkpt = torch.load(aux_weight)
        if aux_chkpt["epoch"] + 1 != aux_epochs:
            del aux_chkpt
            train_aux_for_DCP(cfg,
                              backbone,
                              neck,
                              train_loader,
                              origin_weights,
                              aux_weight,
                              hyp,
                              device,
                              resume=True,
                              epochs=aux_epochs)
        else:
            del aux_chkpt
    else:
        train_aux_for_DCP(cfg,
                          backbone,
                          neck,
                          train_loader,
                          origin_weights,
                          aux_weight,
                          hyp,
                          device,
                          resume=False,
                          epochs=aux_epochs)
    # -----------get trained aux net-----------

    # ----------init model and aux util----------
    origin_model = Darknet(cfg).to(device)
    chkpt = torch.load(origin_weights, map_location=device)
    origin_model.load_state_dict(chkpt['model'], strict=True)
    aux_util = AuxNetUtils(origin_model, hyp, backbone, neck, strategy="DCP")
    del chkpt
    # ----------init model and aux net----------

    mask_cfg, init_state_dict = mask_cfg_and_converted(
        aux_util.mask_replace_layer, cfg, origin_weights, target=None)

    # ----------start from first layer----------
    if not resume:
        first_progress = {
            'current_layer': start_layer,
            'epoch': -1,
            'model': init_state_dict,
            'optimizer': None
        }
        aux_chkpt = torch.load(aux_weight)
        for k, v in aux_chkpt.items():
            if 'aux' in k:
                first_progress[k] = v
        del aux_chkpt
        torch.save(first_progress, progress_chkpt)

        with open(progress_result, 'a') as f:
            t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            f.write('\n' + t + '\n')
        # ----------start from first layer----------

    layer = start_layer
    if start_layer == aux_util.pruning_layer[-1]:
        return mask_cfg, aux_util

    while int(layer) < int(aux_util.pruning_layer[-1]):
        layer = fine_tune(mask_cfg, data, aux_util, device, train_loader,
                          test_loader, ft_epochs)
        channels_select(mask_cfg, data, origin_model, aux_util, device,
                        train_loader, layer, prune_rate)

    return mask_cfg, aux_util
예제 #24
0
def mask_catch(input, output):

    parser = argparse.ArgumentParser()
    parser.add_argument(u"--input_file_path",
                        type=unicode,
                        default=input,
                        help=u"path to images directory")
    parser.add_argument(u"--output_path",
                        type=unicode,
                        default=output,
                        help=u"output image directory")
    parser.add_argument(u"--model_def",
                        type=unicode,
                        default=u"data/yolov3_mask.cfg",
                        help=u"path to model definition file")
    parser.add_argument(u"--weights_path",
                        type=unicode,
                        default=u"checkpoints/yolov3_ckpt_499.pth",
                        help=u"path to weights file")
    parser.add_argument(u"--class_path",
                        type=unicode,
                        default=u"data/mask_dataset.names",
                        help=u"path to class label file")
    parser.add_argument(u"--conf_thres",
                        type=float,
                        default=0.8,
                        help=u"object confidence threshold")
    parser.add_argument(u"--nms_thres",
                        type=float,
                        default=0.3,
                        help=u"iou thresshold for non-maximum suppression")
    parser.add_argument(u"--frame_size",
                        type=int,
                        default=416,
                        help=u"size of each image dimension")

    opt = parser.parse_args()
    # Output directory
    os.makedirs(opt.output_path, exist_ok=True)

    # checking for GPU
    device = torch.device(u"cuda" if torch.cuda.is_available() else u"cpu")

    # Set up model
    model = Darknet(opt.model_def, img_size=opt.frame_size).to(device)

    # loading weights
    if opt.weights_path.endswith(u".weights"):
        model.load_darknet_weights(opt.weights_path)  # Load weights
    else:
        model.load_state_dict(torch.load(opt.weights_path))  # Load checkpoints

    # Set in evaluation mode
    model.eval()

    # Extracts class labels from file
    classes = load_classes(opt.class_path)

    # ckecking for GPU for Tensor
    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.FloatTensor

    print u"\nPerforming object detection:"

    # for text in output
    t_size = cv2.getTextSize(u" ", cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]

    for imagename in os.listdir(opt.input_file_path):

        print u"\n" + imagename + u"_______"
        image_path = os.path.join(opt.input_file_path, imagename)
        print image_path
        # frame extraction
        org_img = cv2.imread(image_path)

        # Original image width and height
        i_height, i_width = org_img.shape[:2]

        # resizing => [BGR -> RGB] => [[0...255] -> [0...1]] => [[3, 416, 416] -> [416, 416, 3]]
        #                       => [[416, 416, 3] => [416, 416, 3, 1]] => [np_array -> tensor] => [tensor -> variable]

        # resizing to [416 x 416]

        # Create a black image
        x = y = i_height if i_height > i_width else i_width

        # Black image
        img = np.zeros((x, y, 3), np.uint8)

        # Putting original image into black image
        start_new_i_height = int((y - i_height) / 2)
        start_new_i_width = int((x - i_width) / 2)

        img[start_new_i_height:(start_new_i_height + i_height),
            start_new_i_width:(start_new_i_width + i_width)] = org_img

        #resizing to [416x 416]
        img = cv2.resize(img, (opt.frame_size, opt.frame_size))

        # [BGR -> RGB]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # [[0...255] -> [0...1]]
        img = np.asarray(img) / 255
        # [[3, 416, 416] -> [416, 416, 3]]
        img = np.transpose(img, [2, 0, 1])
        # [[416, 416, 3] => [416, 416, 3, 1]]
        img = np.expand_dims(img, axis=0)
        # [np_array -> tensor]
        img = torch.Tensor(img)

        # plt.imshow(img[0].permute(1, 2, 0))
        # plt.show()

        # [tensor -> variable]
        img = Variable(img.type(Tensor))

        # Get detections
        with torch.no_grad():
            detections = model(img)

        detections = non_max_suppression_output(detections, opt.conf_thres,
                                                opt.nms_thres)

        # print(detections)

        # For accommodate results in original frame
        mul_constant = x / opt.frame_size

        #We should set a variable for the number of nomask people. i is the variable
        i = 0

        # For each detection in detections
        for detection in detections:
            if detection is not None:

                print u"{0} Detection found".format(len(detection))
                for x1, y1, x2, y2, conf, cls_conf, cls_pred in detection:

                    # Accommodate bounding box in original frame
                    x1 = int(x1 * mul_constant - start_new_i_width)
                    y1 = int(y1 * mul_constant - start_new_i_height)
                    x2 = int(x2 * mul_constant - start_new_i_width)
                    y2 = int(y2 * mul_constant - start_new_i_height)

                    # Bounding box making and setting Bounding box title
                    if (int(cls_pred) == 0):

                        # WITH_MASK
                        cv2.rectangle(org_img, (x1, y1), (x2, y2), (0, 255, 0),
                                      2)
                    else:
                        #WITHOUT_MASK
                        i += 1
                        cv2.rectangle(org_img, (x1, y1), (x2, y2), (0, 0, 255),
                                      2)

                    cv2.putText(org_img,
                                classes[int(cls_pred)] + u": %.2f" % conf,
                                (x1, y1 + t_size[1] + 4),
                                cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 2)
        u"""------------Ready to save!-----------------"""
        import time
        now = time.strftime(u"%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))

        #num is the number of people
        num = len(detection)
        #na=now + '-' + 'NUM:%d'%num +'-'+ 'Nom:%d'%i+'-'+'.jpg'
        u"""------------txt_save-----------------"""
        u"""------------image_save-----------------"""
        na = u'result.jpg'
        out_filepath = os.path.join(opt.output_path, na)
        cv2.imwrite(out_filepath,
                    org_img)  #org_img is final result with frames

        #naa = now + '-' + 'NUM:%d' % num + '-' + 'Nom:%d' % i
        #ssh_scp_put('172.21.39.222',22,'tensor','tensor',out_filepath,'/home/tensor/eden/%s.jpg'%naa)
        #upload_img(na)
        #os.remove(out_filepath)

        signal = 1  #we first set signal only 1

        if i == 0:
            signal = 0

        print u"Signal is ", signal
        print u"Finish to save!!!"

        msg = now + u'-' + u'NUM:%d' % num + u'-' + u'Nomask:%d' % i + u'-'
        nam = u'info.txt'
        full_path = os.path.join(opt.output_path, nam)
        print u"----------------"
        file = open(full_path, u'w')
        file.write(msg)

    cv2.destroyAllWindows()
    return signal
예제 #25
0
def fine_tune(prune_cfg,
              data,
              aux_util,
              device,
              train_loader,
              test_loader,
              epochs=10):
    with open(progress_result, 'a') as f:
        f.write(('\n' + '%10s' * 10 + '\n') %
                ('Stage', 'Epoch', 'DIoU', 'obj', 'cls', 'Total', 'P', 'R',
                 '[email protected]', 'F1'))

    batch_size = train_loader.batch_size
    img_size = train_loader.dataset.img_size
    accumulate = 64 // batch_size
    hook_util = HookUtils()

    pruned_model = Darknet(prune_cfg, img_size=(img_size, img_size)).to(device)

    chkpt = torch.load(progress_chkpt, map_location=device)
    pruned_model.load_state_dict(chkpt['model'], strict=True)

    current_layer = chkpt['current_layer']
    aux_in_layer = aux_util.conv_layer_dict[current_layer]
    aux_model = aux_util.creat_aux_model(aux_in_layer)
    aux_model.to(device)

    aux_model.load_state_dict(chkpt['aux_in{}'.format(aux_in_layer)],
                              strict=True)
    aux_loss_scalar = max(0.01, pow((int(aux_in_layer) + 1) / 75, 2))

    start_epoch = chkpt['epoch'] + 1

    if start_epoch == epochs:
        return current_layer  # fine tune 完毕,返回需要修剪的层名

    pg0, pg1 = [], []  # optimizer parameter groups
    for k, v in dict(pruned_model.named_parameters()).items():
        if 'MaskConv2d.weight' in k:
            pg1 += [v]  # parameter group 1 (apply weight_decay)
        else:
            pg0 += [v]  # parameter group 0

    for v in aux_model.parameters():
        pg0 += [v]  # parameter group 0

    optimizer = optim.SGD(pg0,
                          lr=hyp['lr0'],
                          momentum=hyp['momentum'],
                          nesterov=True)
    optimizer.add_param_group({
        'params': pg1,
        'weight_decay': hyp['weight_decay']
    })  # add pg1 with weight_decay
    del pg0, pg1

    if chkpt['optimizer'] is not None:
        optimizer.load_state_dict(chkpt['optimizer'])

    del chkpt

    scheduler = lr_scheduler.MultiStepLR(
        optimizer, milestones=[epochs // 3, 2 * (epochs // 3)], gamma=0.1)
    scheduler.last_epoch = start_epoch - 1

    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        pruned_model = nn.parallel.DistributedDataParallel(
            pruned_model, find_unused_parameters=True)
        pruned_model.yolo_layers = pruned_model.module.yolo_layers

    # -------------start train-------------
    nb = len(train_loader)
    pruned_model.nc = 80
    pruned_model.hyp = hyp
    pruned_model.arc = 'default'
    for epoch in range(start_epoch, epochs):

        # -------------register hook for model-------------
        for name, child in pruned_model.module.module_list.named_children():
            if name == aux_in_layer:
                handle = child.register_forward_hook(
                    hook_util.hook_prune_output)

        # -------------register hook for model-------------

        pruned_model.train()
        aux_model.train()

        print(('\n' + '%10s' * 7) %
              ('Stage', 'Epoch', 'gpu_mem', 'DIoU', 'obj', 'cls', 'total'))

        # -------------start batch-------------
        mloss = torch.zeros(4).to(device)
        pbar = tqdm(enumerate(train_loader), total=nb)
        for i, (img, targets, _, _) in pbar:
            if len(targets) == 0:
                continue

            ni = nb * epoch + i
            img = img.to(device).float() / 255.0
            targets = targets.to(device)

            pruned_pred = pruned_model(img)
            pruned_loss, pruned_loss_items = compute_loss(
                pruned_pred, targets, pruned_model)
            pruned_loss *= batch_size / 64

            hook_util.cat_to_gpu0()

            aux_pred = aux_model(hook_util.prune_features['gpu0'][0], targets)

            aux_loss = compute_loss_for_DCP(aux_pred, targets)
            aux_loss *= aux_loss_scalar * batch_size / 64

            loss = pruned_loss + aux_loss
            loss.backward()

            hook_util.clean_hook_out()
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()

            pruned_loss_items[2] += aux_loss.item()
            mloss = (mloss * i + pruned_loss_items) / (i + 1)
            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            s = ('%10s' * 3 +
                 '%10.3g' * 4) % ('FiTune ' + current_layer, '%g/%g' %
                                  (epoch, epochs - 1), '%.3gG' % mem, *mloss)
            pbar.set_description(s)
        # -------------end batch-------------

        scheduler.step()
        handle.remove()

        results, _ = test.test(prune_cfg,
                               data,
                               batch_size=batch_size * 2,
                               img_size=416,
                               model=pruned_model,
                               conf_thres=0.1,
                               iou_thres=0.5,
                               save_json=False,
                               dataloader=test_loader)
        """
        chkpt = {'current_layer':
                 'epoch':
                 'model': 
                 'optimizer': 
                 'aux_in12': 
                 'aux_in37':
                 'aux_in62':
                 'aux_in75':
                 'prune_guide':}
        """
        chkpt = torch.load(progress_chkpt, map_location=device)
        chkpt['current_layer'] = current_layer
        chkpt['epoch'] = epoch
        chkpt['model'] = pruned_model.module.state_dict() if type(
            pruned_model
        ) is nn.parallel.DistributedDataParallel else pruned_model.state_dict(
        )
        chkpt[
            'optimizer'] = None if epoch == epochs - 1 else optimizer.state_dict(
            )
        chkpt['aux_in{}'.format(aux_in_layer)] = aux_model.state_dict()

        torch.save(chkpt, progress_chkpt)

        torch.save(chkpt, last)

        if epoch == epochs - 1:
            torch.save(chkpt,
                       '../weights/DCP/backup{}.pt'.format(current_layer))

        del chkpt

        with open(progress_result, 'a') as f:
            f.write(('%10s' * 2 + '%10.3g' * 8) %
                    ('FiTune ' + current_layer, '%g/%g' %
                     (epoch, epochs - 1), *mloss, *results[:4]) + '\n')
    # -------------end train-------------
    torch.cuda.empty_cache()
    return current_layer
def main(target_path, output_path, weights_path, model_cfg, conf_thres,
         nms_thres, xy_loss, wh_loss, no_object_loss, object_loss,
         vanilla_anchor):
    """
        Testing the performance of the network model in following aspects:
        detection duration, precision (positive predictive value) and recall (sensitivity).
    Args:
        target_path: testset file location with images and ground truth txt files.
        output_path: output file location where each image result will be saved.
        weights_path: the path to the tested model weight file.
        model_cfg: the path to the tested model cfg file.
    Returns:
        Saves each test image in the output file location with the bb detection, precision and recall results.
        Prints each test image detetion duration. 
    """
    # Initializing the model
    cuda = torch.cuda.is_available()
    device = torch.device('cuda:0' if cuda else 'cpu')
    random.seed(0)
    torch.manual_seed(0)
    if cuda:
        torch.cuda.manual_seed(0)
        torch.cuda.manual_seed_all(0)
        torch.backends.cudnn.benchmark = True
        torch.cuda.empty_cache()
    model = Darknet(config_path=model_cfg,
                    xy_loss=xy_loss,
                    wh_loss=wh_loss,
                    no_object_loss=no_object_loss,
                    object_loss=object_loss,
                    vanilla_anchor=vanilla_anchor)

    # Load weights
    model.load_weights(weights_path, model.get_start_weight_dim())
    model.to(device, non_blocking=True)

    # Get the images from the folder
    images = glob.glob(f'{target_path}/*.png')
    precisions = []
    recalls = []
    # Looping over all images in the testset
    for idx, fname in enumerate(images):

        img_name = Path(fname).stem
        gt_path = Path(f"{target_path}/{img_name}.txt")

        # Prepare output image with BB with precision and recall
        img_with_boxes = Image.open(fname)
        draw = ImageDraw.Draw(img_with_boxes)  # get a drawing context
        w, h = img_with_boxes.size

        gt_boxes = extract_gt(gt_path, w, h)
        t1 = timer()
        pred_boxes = cones_detection(target_path=fname,
                                     output_path=output_path,
                                     model=model,
                                     device=device,
                                     conf_thres=conf_thres,
                                     nms_thres=nms_thres)
        t2 = timer()
        print(f"Image {idx+1} detection took {round((t2-t1)*1000)} [ms]")
        tp, fp, fn = get_single_image_results(gt_boxes, pred_boxes)
        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        precisions.append(precision)
        recalls.append(recall)

        # Draw predicted BB on the image
        for box in pred_boxes:
            x0 = box[0]
            y0 = box[1]
            x1 = box[2]
            y1 = box[3]
            draw.rectangle((x0, y0, x1, y1), outline="red")
        # # Draw ground truth BB on the image (for debugging)
        # for box in gt_boxes:
        #     x0 = box[0]
        #     y0 = box[1]
        #     x1 = box[2]
        #     y1 = box[3]
        #     draw.rectangle((x0, y0, x1, y1), outline="green")

        # draw text, full opacity
        fnt = ImageFont.truetype(font="arial.ttf", size=40)  # get a font
        draw.text((10, 10),
                  f"Precision: {precision:.3f}, Recall: {recall:.3f}",
                  fill=(0, 255, 0, 255),
                  font=fnt)

        # img_with_boxes.save(os.path.join(output_path,target_path.split('/')[-1]))
        img_with_boxes.save(os.path.join(output_path, os.path.basename(fname)))

    precision_score = np.mean(precisions)
    recall_score = np.mean(recalls)
    print(
        f"Model precision score: {precision_score}, recall score: {recall_score}"
    )
예제 #27
0
        default=0,
        help="number of cpu threads to use during batch generation")
    parser.add_argument("--img_size",
                        type=int,
                        default=416,
                        help="size of each image dimension")
    parser.add_argument("--checkpoint_model",
                        type=str,
                        help="path to checkpoint model")
    opt = parser.parse_args()
    print(opt)

    os.makedirs("output", exist_ok=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Darknet(opt.model_def, img_size=opt.img_size).to(device)

    if opt.weights_path.endswith(".weights"):
        """Load darknet weights"""
        model.load_darknet_weights(opt.weights_path)
    else:
        """Load checkpoint weights"""
        model.load_state_dict(torch.load(opt.weights_path))

    model.eval()

    dataloader = DataLoader(
        ImageFolder(opt.image_folder, img_size=opt.img_size),
        batch_size=opt.batch_size,
        shuffle=False,
        num_workers=opt.n_cpu,
예제 #28
0
def train():
	
	writer = SummaryWriter(os.path.join('fcn_runs', opt.exp_name))
	best_test_loss = np.inf
	
	batch_size = opt.batch_size
	num_workers = opt.num_workers
	device = torch.device('cuda:{}'.format(opt.device))
	epochs = opt.epochs

	base_model = Darknet(opt.cfg)
	# Load weights
	weights = 'weights/yolov3.pt'
	attempt_download(weights)
	if weights.endswith('.pt'):  # pytorch format
		base_model.load_state_dict(torch.load(weights, map_location=device)['model'])
	else:  # darknet format
		load_darknet_weights(base_model, weights)

	model = Net(base_model)
	model.to(device)

	for module in model.base_model.module_list:
		for parameter in module.parameters():
			parameter.requires_grad = False

	# optimizer = optim.Adam(model.parameters(), lr=1e-3)
	optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, nesterov=True)
	scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.995, last_epoch=-1)
	criterion = FocalLoss(weights=None, gamma=2)
	# criterion = nn.BCEWithLogitsLoss()
	
	train_path = 'coco/train2017.txt'
	test_path = 'coco/val2017.txt'
	trainset = LoadImagesAndLabels(train_path, img_size, batch_size, augment=True, hyp=hyp, rect=opt.rect, cache_images=False, single_cls=False)
	testset = LoadImagesAndLabels(test_path, imgsz_test, batch_size, hyp=hyp, rect=True, cache_images=True, single_cls=False)

	# trainloader
	nw = num_workers
	trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, num_workers=nw,
											 shuffle=not opt.rect,  # Shuffle=True unless rectangular training is used
											 pin_memory=True, collate_fn=trainset.collate_fn)
	# Testloader
	testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=nw, pin_memory=True, collate_fn=trainset.collate_fn)


	for epoch in range(epochs):
		model.train()

		train_loss, train_acc = eval(trainloader, model, criterion, optimizer, device, train=True)
		print('Epoch {} | Train Loss: {}'.format(epoch, train_loss))
		writer.add_scalar('Train Loss', train_loss, epoch)
		
		with torch.no_grad():
			model.eval()
			test_loss, test_acc = eval(testloader, model, criterion, optimizer, device, train=False)
			
			print('Epoch {} | Test Loss: {}'.format(epoch, test_loss))
			writer.add_scalar('Test Loss', test_loss, epoch)

			if test_loss < best_test_loss:
				torch.save(model.state_dict(), os.path.join('{}_test_weights.pt'.format(opt.exp_name)))
				best_test_loss = test_loss

		for param_group in optimizer.param_groups:
			print('learning rate: {}'.format(param_group['lr']))
		scheduler.step()
예제 #29
0
    os.makedirs("output", exist_ok=True)
    os.makedirs("checkpoints", exist_ok=True)

    # Get data configuration
    data_config = parse_data_config(opt.data_config)
    if platform == "linux" or platform == "linux2":
        train_path = data_config["train_Linux"]
        valid_path = data_config["valid_Linux"]
    else:
        train_path = data_config["train"]
        valid_path = data_config["valid"]

    class_names = load_classes(data_config["names"])
    # Initiate model
    model = Darknet(opt.model_def).to(device)
    model.apply(weights_init_normal)

    # If specified we start from checkpoint
    if opt.pretrained_weights:
        if opt.pretrained_weights.endswith(".pth"):
            model.load_state_dict(torch.load(opt.pretrained_weights))
        else:
            model.load_darknet_weights(opt.pretrained_weights)

    # Get dataloader
    dataset = ListDataset(train_path,
                          augment=False,
                          multiscale=opt.multiscale_training)
    dataloader = torch.utils.data.DataLoader(
        dataset,
예제 #30
0
def main():
    img_size = 512  # 必须是32的整数倍 [416, 512, 608]
    cfg = "/home/mist/yolov3_spp/cfg/yolov3-spp.cfg"  # 改成生成的.cfg文件
    weights = "/home/mist/yolov3_spp/weights/yolov3spp-29.pt".format(
        img_size)  # 改成自己训练好的权重文件
    json_path = "/home/mist/yolov3_spp/data/pascal_voc_classes.json"  # json标签文件
    img_path = "test.jpg"
    assert os.path.exists(cfg), "cfg file {} dose not exist.".format(cfg)
    assert os.path.exists(weights), "weights file {} dose not exist.".format(
        weights)
    assert os.path.exists(json_path), "json file {} dose not exist.".format(
        json_path)
    assert os.path.exists(img_path), "image file {} dose not exist.".format(
        img_path)

    json_file = open(json_path, 'r')
    class_dict = json.load(json_file)
    category_index = {v: k for k, v in class_dict.items()}

    input_size = (img_size, img_size)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = Darknet(cfg, img_size)
    model.load_state_dict(torch.load(weights, map_location=device)["model"])
    model.to(device)

    model.eval()
    with torch.no_grad():
        # init
        img = torch.zeros((1, 3, img_size, img_size), device=device)
        model(img)

        img_o = cv2.imread(img_path)  # BGR
        assert img_o is not None, "Image Not Found " + img_path

        img = img_utils.letterbox(img_o,
                                  new_shape=input_size,
                                  auto=True,
                                  color=(0, 0, 0))[0]
        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        img = torch.from_numpy(img).to(device).float()
        img /= 255.0  # scale (0, 255) to (0, 1)
        img = img.unsqueeze(0)  # add batch dimension

        t1 = torch_utils.time_synchronized()
        pred = model(img)[0]  # only get inference result
        t2 = torch_utils.time_synchronized()
        print(t2 - t1)

        pred = utils.non_max_suppression(pred,
                                         conf_thres=0.1,
                                         iou_thres=0.6,
                                         multi_label=True)[0]
        t3 = time.time()
        print(t3 - t2)

        if pred is None:
            print("No target detected.")
            exit(0)

        # process detections
        pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4],
                                         img_o.shape).round()
        print(pred.shape)

        bboxes = pred[:, :4].detach().cpu().numpy()
        scores = pred[:, 4].detach().cpu().numpy()
        classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1

        img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores,
                         category_index)
        plt.imshow(img_o)
        plt.show()

        img_o.save("test_result.jpg")