Beispiel #1
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--model_path', help='Path to model', type=str)

    parser = parser.parse_args(args)

    dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
                              transform=transforms.Compose([Normalizer(), Resizer()]))
    dataset_val.image_ids = dataset_val.image_ids[:50] # TEST

    # Create the model
    retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet.load_state_dict(torch.load(parser.model_path))
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet.load_state_dict(torch.load(parser.model_path))
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = False
    retinanet.eval()
    retinanet.module.freeze_bn()

    coco_eval.evaluate_coco(dataset_val, retinanet)
Beispiel #2
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--model_path', help='Path to model', type=str)

    parser = parser.parse_args(args)

    model_path = './model_final.pt'
    test_path = './test'
    dataset_test = AIZOODataset(test_path, transforms=transforms.Compose([Normalizer(), Resizer()]))

    # Create the model
    retinanet = model.resnet50(num_classes=3, pretrained=False)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.load(model_path)
        #retinanet.load_state_dict(checkpoint.module)
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet.load_state_dict(torch.load(model_path))
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = False
    retinanet.eval()
    #retinanet.freeze_bn()

    coco_eval.evaluate_coco(dataset_test, retinanet)
Beispiel #3
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--model_path', help='Path to the model file.')
    parser.add_argument('--configfile', help='Path to the config file.')
    parser.add_argument('--model_out_path',
                        help='Path to the output model file')
    parser = parser.parse_args(args)
    configs = configparser.ConfigParser()
    configs.read(parser.configfile)

    try:

        depth = int(configs['TRAINING']['depth'])
        num_classes = int(configs['TRAINING']['num_classes'])
        try:
            ratios = json.loads(configs['MODEL']['ratios'])
            scales = json.loads(configs['MODEL']['scales'])
        except Exception as e:
            print(e)
            print('USING DEFAULT RATIOS AND SCALES')
            ratios = None
            scales = None
    except Exception as e:
        print(e)
        print(
            'CONFIG FILE IS INVALID. PLEASE REFER TO THE EXAMPLE CONFIG FILE AT config.txt'
        )
        sys.exit()

    # Create the model
    if depth == 18:
        retinanet = model.resnet18(num_classes=num_classes,
                                   pretrained=False,
                                   ratios=ratios,
                                   scales=scales)
    elif depth == 50:
        retinanet = model.resnet50(num_classes=num_classes,
                                   pretrained=True,
                                   ratios=ratios,
                                   scales=scales)
    else:
        print(f"DEPTH FROM : {parser.configfile} INACCURATE. MUST BE 18 or 50")
        sys.exit(0)

    if torch.cuda.is_available():
        retinanet = retinanet.cuda()
        retinanet.load_state_dict(torch.load(parser.model_path))

    else:
        retinanet.load_state_dict(
            torch.load(parser.model_path, map_location=torch.device('cpu')))

    torch.save(retinanet.state_dict(),
               parser.model_out_path,
               _use_new_zipfile_serialization=False)
    def Model(self, model_name="resnet18", gpu_devices=[0]):
        '''
        User function: Set Model parameters

            Available Models
                resnet18
                resnet34
                resnet50
                resnet101
                resnet152

        Args:
            model_name (str): Select model from available models
            gpu_devices (list): List of GPU Device IDs to be used in training

        Returns:
            None
        '''

        num_classes = self.system_dict["local"]["dataset_train"].num_classes()
        if model_name == "resnet18":
            retinanet = model.resnet18(num_classes=num_classes,
                                       pretrained=True)
        elif model_name == "resnet34":
            retinanet = model.resnet34(num_classes=num_classes,
                                       pretrained=True)
        elif model_name == "resnet50":
            retinanet = model.resnet50(num_classes=num_classes,
                                       pretrained=True)
        elif model_name == "resnet101":
            retinanet = model.resnet101(num_classes=num_classes,
                                        pretrained=True)
        elif model_name == "resnet152":
            retinanet = model.resnet152(num_classes=num_classes,
                                        pretrained=True)

        if self.system_dict["params"]["use_gpu"]:
            self.system_dict["params"]["gpu_devices"] = gpu_devices
            if len(self.system_dict["params"]["gpu_devices"]) == 1:
                os.environ["CUDA_VISIBLE_DEVICES"] = str(
                    self.system_dict["params"]["gpu_devices"][0])
            else:
                os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([
                    str(id) for id in self.system_dict["params"]["gpu_devices"]
                ])
            self.system_dict["local"][
                "device"] = 'cuda' if torch.cuda.is_available() else 'cpu'
            retinanet = retinanet.to(self.system_dict["local"]["device"])
            retinanet = torch.nn.DataParallel(retinanet).to(
                self.system_dict["local"]["device"])

        retinanet.training = True
        retinanet.train()
        retinanet.module.freeze_bn()

        self.system_dict["local"]["model"] = retinanet
Beispiel #5
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--csv_annotations_path',
                        help='Path to CSV annotations')
    parser.add_argument('--model_path', help='Path to model', type=str)
    parser.add_argument('--images_path',
                        help='Path to images directory',
                        type=str)
    parser.add_argument('--class_list_path',
                        help='Path to classlist csv',
                        type=str)
    parser.add_argument('--iou_threshold',
                        help='IOU threshold used for evaluation',
                        type=str,
                        default='0.5')
    parser = parser.parse_args(args)

    #dataset_val = CocoDataset(parser.coco_path, set_name='val2017',transform=transforms.Compose([Normalizer(), Resizer()]))
    dataset_val = CSVDataset(parser.csv_annotations_path,
                             parser.class_list_path,
                             transform=transforms.Compose(
                                 [Normalizer(), Resizer()]))
    # Create the model
    #retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True)
    config = dict({"scales": None, "ratios": None})
    config = load_config("config2.yaml", config)
    retinanet = model.resnet50(num_classes=dataset_val.num_classes(),
                               pretrained=False,
                               ratios=config["ratios"],
                               scales=config["scales"])

    retinanet, _, _ = load_ckpt(parser.model_path, retinanet)

    use_gpu = True

    if use_gpu:
        print("Using GPU for validation process")
        if torch.cuda.is_available():
            retinanet = torch.nn.DataParallel(retinanet.cuda())
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = False
    retinanet.eval()
    retinanet.module.freeze_bn()

    print(
        csv_eval.evaluate(dataset_val,
                          retinanet,
                          score_threshold=0.4,
                          iou_threshold=float(parser.iou_threshold)))
Beispiel #6
0
def export(
    checkpoint: str,
    output_path,
    num_classes: Optional[int] = 1,
    model_arch: Optional[str] = "resnet-50",
    input_size: Optional[Tuple[int, int]] = (512, 512),
    batch_size: Optional[int] = 1,
    verbose: Optional[bool] = False,
):

    assert output_path.endswith(
        ".onnx"), "`output_path` must be path to the output `onnx` file"
    if model_arch == "resnet-18":
        net = model.resnet18(num_classes)
    elif model_arch == "resnet-34":
        net = model.resnet34(num_classes)
    elif model_arch == "resnet-50":
        net = model.resnet50(num_classes)
    elif model_arch == "resnet-101":
        net = model.resnet101(num_classes)
    elif model_arch == "resnet-152":
        net = model.resnet152(num_classes)
    else:
        raise NotImplementedError

    device = torch.device(
        "cuda:0") if torch.cuda.is_available() else torch.device("cpu")
    logger.info(f"using device: {device}")
    net = net.to(device)
    state_dict = torch.load(checkpoint, map_location=device)
    state_dict = remove_module(state_dict)
    net.load_state_dict(state_dict)
    logger.info(f"successfully loaded saved checkpoint.")

    dummy_input = torch.randn(batch_size, 3, input_size[0], input_size[1])
    net.eval()
    net.export = True
    dummy_input = dummy_input.to(device)

    logger.info(f"exporting to {output_path}...")
    torch.onnx.export(
        net,
        dummy_input,
        output_path,
        opset_version=11,
        verbose=verbose,
        input_names=["input"],
        output_names=["anchors", "classification", "regression"],
    )
    logger.info("export complete")
Beispiel #7
0
def load_model(model_path, configfile, no_nms=False):
    configs = configparser.ConfigParser()
    configs.read(configfile)

    try:
        depth = int(configs['TRAINING']['depth'])
        input_shape = json.loads(configs['MODEL']['input_shape'])
        num_classes = int(configs['TRAINING']['num_classes'])
        try:
            ratios = json.loads(configs['MODEL']['ratios'])
            scales = json.loads(configs['MODEL']['scales'])
        except Exception as e:
            print(e)
            print('USING DEFAULT RATIOS AND SCALES')
            ratios = None
            scales = None
    except Exception as e:
        print(e)
        print('CONFIG FILE IS INVALID. PLEASE REFER TO THE EXAMPLE CONFIG FILE AT config.txt')
        sys.exit()

    # Create the model
    if depth == 18:
        retinanet = model.resnet18(num_classes=num_classes, pretrained=False, ratios=ratios,
                                   scales=scales, no_nms=no_nms)
    elif depth == 50:
        retinanet = model.resnet50(num_classes=num_classes, pretrained=True, ratios=ratios,
                                   scales=scales, no_nms=no_nms)
    else:
        print(f"DEPTH FROM : {configfile} INACCURATE. MUST BE 18 or 50")
        sys.exit(0)

    if torch.cuda.is_available():
        retinanet = retinanet.cuda()
        retinanet.load_state_dict(torch.load(model_path))

    else:
        retinanet.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))

    retinanet.training = False
    retinanet.eval()

    return retinanet
Beispiel #8
0
    def Model(self, model_name="resnet18", gpu_devices=[0]):

        num_classes = self.system_dict["local"]["dataset_train"].num_classes()
        if model_name == "resnet18":
            retinanet = model.resnet18(num_classes=num_classes,
                                       pretrained=True)
        elif model_name == "resnet34":
            retinanet = model.resnet34(num_classes=num_classes,
                                       pretrained=True)
        elif model_name == "resnet50":
            retinanet = model.resnet50(num_classes=num_classes,
                                       pretrained=True)
        elif model_name == "resnet101":
            retinanet = model.resnet101(num_classes=num_classes,
                                        pretrained=True)
        elif model_name == "resnet152":
            retinanet = model.resnet152(num_classes=num_classes,
                                        pretrained=True)

        if self.system_dict["params"]["use_gpu"]:
            self.system_dict["params"]["gpu_devices"] = gpu_devices
            if len(self.system_dict["params"]["gpu_devices"]) == 1:
                os.environ["CUDA_VISIBLE_DEVICES"] = str(
                    self.system_dict["params"]["gpu_devices"][0])
            else:
                os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([
                    str(id) for id in self.system_dict["params"]["gpu_devices"]
                ])
            self.system_dict["local"][
                "device"] = 'cuda' if torch.cuda.is_available() else 'cpu'
            retinanet = retinanet.to(self.system_dict["local"]["device"])
            retinanet = torch.nn.DataParallel(retinanet).to(
                self.system_dict["local"]["device"])

        retinanet.training = True
        retinanet.train()
        retinanet.module.freeze_bn()

        self.system_dict["local"]["model"] = retinanet
Beispiel #9
0
def main(args=None):
    sys.argv.append('--coco_path')
    sys.argv.append(
        '/home/jht/github/deep-high-resolution-net.pytorch/data/coco')
    sys.argv.append('--model_path')
    sys.argv.append('coco_resnet_50_map_0_335_state_dict.pt')

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--model_path', help='Path to model', type=str)
    parser = parser.parse_args(args)

    retinanet = model.resnet50(num_classes=80, pretrained=True)
    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet.load_state_dict(torch.load(parser.model_path))
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet.load_state_dict(torch.load(parser.model_path))
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = False
    retinanet.eval()
    retinanet.module.freeze_bn()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    # 初始化,并读取第一帧,rval表示是否成功获取帧,frame是捕获到的图像
    vc = cv2.VideoCapture('/home/jht/16_2.MP4')
    rval, frame = vc.read()

    # 获取视频fps
    fps = vc.get(cv2.CAP_PROP_FPS)
    # 获取视频总帧数
    frame_all = vc.get(cv2.CAP_PROP_FRAME_COUNT)
    print("[INFO] 视频FPS: {}".format(fps))
    print("[INFO] 视频总帧数: {}".format(frame_all))
    print("[INFO] 视频时长: {}s".format(frame_all / fps))

    mean = np.array([[[0.485, 0.456, 0.406]]])
    std = np.array([[[0.229, 0.224, 0.225]]])
    while rval:

        with torch.no_grad():
            st = time.time()
            rval, img_bgr = vc.read()
            img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
            img_rgb = img_rgb.astype(np.float32) / 255.0
            frame = (img_rgb - mean) / std
            rows, cols, cns = frame.shape
            pad_w = 32 - rows % 32
            pad_h = 32 - cols % 32
            rows = rows + pad_w
            cols = cols + pad_h
            new_image = cv2.resize(frame, (cols, rows))
            img = torch.from_numpy(new_image)

            if torch.cuda.is_available():
                scores, labels, boxes = retinanet(
                    img.permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
            else:
                scores, labels, boxes = retinanet(
                    img.permute(2, 0, 1).float().unsqueeze(dim=0))
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.7)

            tensor = img * std + mean
            img = tensor.mul(255).clamp(0, 255).byte().cpu().numpy()
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

            for j in range(idxs[0].shape[0]):
                bbox = boxes[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                print(int(labels[idxs[0][j]]))
                if int(labels[idxs[0][j]]) == 0:
                    draw_caption(img, (x1, y1, x2, y2), "person")
                    cv2.rectangle(img, (x1, y1), (x2, y2),
                                  color=(0, 0, 255),
                                  thickness=2)

            cv2.imshow('img', img)
            cv2.waitKey(0)
Beispiel #10
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default='csv')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)',
        default='data/train_retinanet.csv')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)',
                        default='data/class_retinanet.csv')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)',
        default='data/val_retinanet.csv')

    parser.add_argument('--model_path',
                        default='coco_resnet_50_map_0_335_state_dict.pt',
                        help='Path to file containing pretrained retinanet')

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs_detection',
                        help='Number of epochs for detection',
                        type=int,
                        default=50)
    parser.add_argument('--epochs_classification',
                        help='Number of epochs for classification',
                        type=int,
                        default=50)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=1,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if parser.model_path is not None:
        print('loading ', parser.model_path)
        if 'coco' in parser.model_path:
            retinanet.load_state_dict(torch.load(parser.model_path),
                                      strict=False)
        else:
            retinanet = torch.load(parser.model_path)
        print('Pretrained model loaded!')

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    #Here training the detection
    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=4,
                                                     verbose=True)
    loss_hist = collections.deque(maxlen=500)
    loss_style_classif = nn.CrossEntropyLoss()

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    mAP_list = []
    mAPbest = 0
    for epoch_num in range(parser.epochs_detection):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    [classification_loss, regression_loss], style = retinanet(
                        [data['img'].cuda().float(), data['annot']])
                else:
                    [classification_loss, regression_loss
                     ], style = retinanet([data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                if torch.cuda.is_available():
                    style_loss = loss_style_classif(
                        style,
                        torch.tensor(data['style']).cuda())
                else:
                    style_loss = loss_style_classif(
                        style, torch.tensor(data['style']))
                loss = classification_loss + regression_loss + style_loss

                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.4f} | Regression loss: {:1.4f} | Style loss: {:1.4f} | Running loss: {:1.4f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), float(style_loss),
                            np.mean(loss_hist)))

                del classification_loss
                del regression_loss
                del style_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':
            print('Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:
            print('Evaluating dataset')
            mAPclasses, mAP, accu = csv_eval.evaluate(dataset_val, retinanet)
            mAP_list.append(mAP)
            print('mAP_list', mAP_list)
        if mAP > mAPbest:
            print('Saving best checkpoint')
            torch.save(retinanet, 'model_best.pt')
            mAPbest = mAP

        scheduler.step(np.mean(epoch_loss))
        torch.save(retinanet.module,
                   '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()
    torch.save(retinanet, 'model_final.pt')

    # Here we aggregate all the data to don't have to appy the Retinanet during training.
    retinanet.load_state_dict(torch.load('model_best.pt').state_dict())
    List_feature = []
    List_target = []
    retinanet.training = False
    retinanet.eval()
    retinanet.module.style_inference = True

    retinanet.module.freeze_bn()

    epoch_loss = []
    with torch.no_grad():
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    _, _, feature_vec = retinanet(data['img'].cuda().float())
                else:
                    _, _, feature_vec = retinanet(data['img'].float())
                List_feature.append(torch.squeeze(feature_vec).cpu())
                List_target.append(data['style'][0])
            except Exception as e:
                print(e)
                continue
    print('END of preparation of the data for classification of style')
    # Here begins Style training. Need to set to style_train. They are using the same loader, as it was expected to train both at the same time.

    batch_size_classification = 64
    dataloader_train_style = torch.utils.data.DataLoader(
        StyleDataset(List_feature, List_target),
        batch_size=batch_size_classification)

    retinanet.load_state_dict(torch.load('model_best.pt').state_dict())

    # Here training the detection

    retinanet.module.style_inference = False
    retinanet.module.style_train(True)
    retinanet.training = True
    retinanet.train()
    optimizer = optim.Adam(
        retinanet.module.styleClassificationModel.parameters(),
        lr=5e-3,
        weight_decay=1e-3)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='max',
                                                     patience=4,
                                                     verbose=True)
    loss_hist = collections.deque(maxlen=500)
    loss_style_classif = nn.CrossEntropyLoss()
    retinanet.train()
    retinanet.module.freeze_bn()
    criterion = nn.CrossEntropyLoss()
    accu_list = []
    accubest = 0
    for epoch_num in range(parser.epochs_classification):

        retinanet.train()
        retinanet.module.freeze_bn()
        epoch_loss = []
        total = 0
        correct = 0
        for iter_num, data in enumerate(dataloader_train_style):
            try:
                optimizer.zero_grad()
                inputs, targets = data
                if torch.cuda.is_available():
                    inputs, targets = inputs.cuda(), targets.cuda()

                outputs = retinanet.module.styleClassificationModel(
                    inputs, 0, 0, 0, True)
                loss = criterion(outputs, targets)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))
                total += targets.size(0)
                _, predicted = torch.max(outputs.data, 1)
                correct += predicted.eq(targets.data).cpu().sum()

                print(
                    '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%'
                    %
                    (epoch_num, parser.epochs_classification, iter_num + 1,
                     (len(dataloader_train_style) // batch_size_classification)
                     + 1, loss.item(), 100. * correct / total))

            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':
            print('Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:
            print('Evaluating dataset')
            mAPclasses, mAP, accu = csv_eval.evaluate(dataset_val, retinanet)
            accu_list.append(accu)
            print('mAP_list', mAP_list, 'accu_list', accu_list)
        if accu > accubest:
            print('Saving best checkpoint')
            torch.save(retinanet.module, 'model_best_classif.pt')
            accubest = accu

        scheduler.step(accu)
        torch.save(retinanet.module,
                   '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()
    torch.save(retinanet.module, 'model_final.pt')
Beispiel #11
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--model_save_path',
                        help='Path to save model',
                        type=str)

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=8,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    # add draw tensorboard code
    writer = SummaryWriter(log_dir='./logs/416*416/', flush_secs=60)
    # if Cuda:
    #     graph_inputs = torch.from_numpy(np.random.rand(1, 3, input_shape[0], input_shape[1])).type(
    #         torch.FloatTensor).cuda()
    # else:
    #     graph_inputs = torch.from_numpy(np.random.rand(1, 3, input_shape[0], input_shape[1])).type(torch.FloatTensor)
    # writer.add_graph(model, (graph_inputs,))

    # add gap save model count variable
    n = 0

    for epoch_num in range(parser.epochs):
        n += 1

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        ### begin calculate train loss
        for iter_num, data in enumerate(dataloader_train):
            # try:
            optimizer.zero_grad()

            if torch.cuda.is_available():
                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])
            else:
                classification_loss, regression_loss = retinanet(
                    [data['img'].float(), data['annot']])

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                .format(epoch_num, iter_num, float(classification_loss),
                        float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss
            # except Exception as e:
            #     print(e)
            #     continue

        ### begin calculate valid loss
        for iter_num, data in enumerate(dataloader_val):
            # try:
            optimizer.zero_grad()

            if torch.cuda.is_available():
                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])
            else:
                classification_loss, regression_loss = retinanet(
                    [data['img'].float(), data['annot']])

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            if bool(loss == 0):
                continue

            loss_hist.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Valid-Classification loss: {:1.5f} | Valid-Regression loss: {:1.5f} | Running Valid loss: {:1.5f}'
                .format(epoch_num, iter_num, float(classification_loss),
                        float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)
            print('Epoch: {} | mAP: {:.3f}'.format(epoch_num, float(mAP)))

        scheduler.step(np.mean(epoch_loss))

        if n % 10 == 0:
            torch.save(
                retinanet.module, parser.model_save_path +
                '/' + '{}_retinanet_{}_{:.3f}.pt'.format(
                    parser.dataset, epoch_num, mAP))

    retinanet.eval()

    torch.save(retinanet, parser.model_save_path + '/' + 'model_final.pt')
Beispiel #12
0
    def __init__(self,
                 detection_model_path,
                 score_model_path,
                 private_key,
                 device_name='cpu',
                 backend='pytorch',
                 det_vino_device=DETECTION_VINO_DEVICE,
                 score_vino_device=SCORE_VINO_DEVICE):
        self.detection_model_path = detection_model_path
        self.score_model_path = score_model_path
        self.private_key = private_key
        self.device_name = device_name
        self.backend = backend

        root_core_path, _ = os.path.split(os.path.abspath(__file__))
        self.font = ImageFont.truetype(
            os.path.join(root_core_path, 'resource/arial.ttf'), 24)

        # TODO : normalization
        self.detect_trans = detection_aug.Compose([
            detection_aug.Pad(),
            detection_aug.Resize(IMAGE_SIZE, IMAGE_SIZE),
            detection_aug.AutoLevel(min_level_rate=1, max_level_rate=1),
            detection_aug.AutoContrast(),
            detection_aug.Contrast(1.25),
            detection_aug.ToTensor()
        ])
        self.score_trans = transforms.Compose([
            score_aug.AutoLevel(),
            score_aug.AutoContrast(),
            score_aug.Contrast(contrast=1.2),
            score_aug.Pad(),  # pad to square
            transforms.ToPILImage(),
            transforms.Resize(IMAGE_SIZE if self.backend ==
                              'pytorch' else VINO_CROP_SIZE),
            transforms.ToTensor()
        ])

        if self.backend == 'pytorch':
            self.device = torch.device(self.device_name)

            # TODO : decrypt model with private_key

            self.detection_net = detection_model.resnet50(
                num_classes=NUM_CLASSES)
            self.score_net = score_model.resnet50(num_classes=1)

            self.detection_net.load_state_dict(
                torch.load(self.detection_model_path))
            self.score_net.load_state_dict(torch.load(self.score_model_path))

            self.detection_net.to(self.device)
            self.score_net.to(self.device)
        elif self.backend == 'openvino':
            # IR expects BGR, but our transform pipeline exports RGB
            # remember to convert model with --reverse_input_channels parameter

            # our normalization is implemented in transform
            # so do NOT specify --scale_values, --mean_values

            # after ToTensor(), we got (n, c, h, w) tensor so .numpy() should be ok
            from openvino.inference_engine import IECore

            self.ie = IECore()

            self.detection_model_bin = os.path.splitext(
                self.detection_model_path)[0] + '.bin'
            self.score_model_bin = os.path.splitext(
                self.score_model_path)[0] + '.bin'

            self.detection_net = self.ie.read_network(
                self.detection_model_path, self.detection_model_bin)
            self.score_net = self.ie.read_network(self.score_model_path,
                                                  self.score_model_bin)

            self.detection_input_layer = next(iter(self.detection_net.inputs))
            self.detection_output_layers = sorted(
                iter(self.detection_net.outputs))
            self.score_input_layer = next(iter(self.score_net.inputs))
            self.score_output_layer = next(iter(self.score_net.outputs))

            self.detection_exec_model = self.ie.load_network(
                self.detection_net, det_vino_device)
            self.score_exec_models = []
            if score_vino_device == 'MULTI':
                for dev in self.ie.available_devices:
                    if 'MYRIAD' in dev:
                        self.score_exec_models.append(
                            self.ie.load_network(self.score_net, dev))
                print('det device: {}, score MYRIAD device(s): {}'.format(
                    det_vino_device, len(self.score_exec_models)))
            else:
                self.score_exec_model = self.ie.load_network(
                    self.score_net, score_vino_device)
                print('det device: {}, score device: {}'.format(
                    det_vino_device, score_vino_device))

            self.detection_post_processor = DetectionPostProcessor(
                NMS_THRESHOLD, SCORE_THRESHOLD)
        else:
            print('unknown backend {}'.format(self.backend))
Beispiel #13
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset', default='csv', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_train', default='dataset/pascal_train.csv', help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes', default='dataset/classes.csv', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', default='dataset/pascal_val.csv', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)
    parser.add_argument('--weights_folder', help='path to save weight', type=str, required=True)


    parser = parser.parse_args(args)
    if not os.path.exists(parser.weights_folder):
        os.makedirs(parser.weights_folder)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path, set_name='train2017',
                                    transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
                                  transform=transforms.Compose([Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
                                   transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
                                     transform=transforms.Compose([Normalizer(), Resizer()]))

    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train, batch_size=5, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=4, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=8, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=4, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    # import ipdb; ipdb.set_trace()
    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        total_loss = 0
        total_regression_loss = 0
        total_classification_loss = 0
        with tqdm(dataloader_train, unit="batch") as tepoch:
            for data in tepoch:
            # for iter_num, data in tepoch:#enumerate(dataloader_train):
                tepoch.set_description(f"Epoch {epoch_num}")
                try:
                    optimizer.zero_grad()

                    if torch.cuda.is_available():
                        classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
                    else:
                        classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']])
                        
                    classification_loss = classification_loss.mean()
                    regression_loss = regression_loss.mean()

                    loss = classification_loss + regression_loss

                    total_loss = total_loss + loss
                    total_regression_loss = total_regression_loss + regression_loss
                    total_classification_loss = total_classification_loss + classification_loss

                    if bool(loss == 0):
                        continue

                    loss.backward()

                    torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                    optimizer.step()

                    loss_hist.append(float(loss))

                    epoch_loss.append(float(loss))

                    # print(
                        # 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                        #     epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
                    tepoch.set_postfix(cls_loss="{:1.5f}".format(classification_loss), reg_loss="{:1.5f}".format(regression_loss))
                    time.sleep(0.1)
                    del classification_loss
                    del regression_loss
                except Exception as e:
                    print(e)
                    continue
        tb.add_scalar('Training loss', total_loss, epoch_num)
        tb.add_scalar('Training regression loss', total_regression_loss, epoch_num)
        tb.add_scalar('Training accuracy loss', total_classification_loss, epoch_num)
        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            
            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module, '{}/{}_retinanet_{}.pt'.format(parser.weights_folder,parser.dataset, epoch_num))


    retinanet.eval()

    torch.save(retinanet, '{}/model_final.pt'.format(parser.weights_folder))
Beispiel #14
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    # parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--dataset_root',
        default='/root/data/VOCdevkit/',
        help=
        'Dataset root directory path [/root/data/VOCdevkit/, /root/data/coco/, /root/data/FLIR_ADAS]'
    )
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument(
        '--resume',
        default=None,
        type=str,
        help='Checkpoint state_dict file to resume training from')
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--batch_size',
                        default=16,
                        type=int,
                        help='Batch size for training')
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--lr',
                        '--learning_rate',
                        default=1e-4,
                        type=float,
                        help='initial learning rate')
    parser.add_argument('--weight_decay',
                        default=5e-4,
                        type=float,
                        help='Weight decay')
    parser.add_argument('-j',
                        '--workers',
                        default=4,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument("--log",
                        default=False,
                        action="store_true",
                        help="Write log file.")

    parser = parser.parse_args(args)

    network_name = 'RetinaNet-Res{}'.format(parser.depth)
    # print('network_name:', network_name)
    net_logger = logging.getLogger('Network Logger')
    formatter = logging.Formatter(LOGGING_FORMAT)
    streamhandler = logging.StreamHandler()
    streamhandler.setFormatter(formatter)
    net_logger.addHandler(streamhandler)
    if parser.log:
        net_logger.setLevel(logging.INFO)
        # logging.basicConfig(level=logging.DEBUG, format=LOGGING_FORMAT,
        #                     filename=os.path.join('log', '{}.log'.format(network_name)), filemode='a')
        filehandler = logging.FileHandler(os.path.join(
            'log', '{}.log'.format(network_name)),
                                          mode='a')
        filehandler.setFormatter(formatter)
        net_logger.addHandler(filehandler)

    net_logger.info('Network Name: {:>20}'.format(network_name))

    # Create the data loaders
    if parser.dataset == 'coco':
        if parser.dataset_root is None:
            raise ValueError(
                'Must provide --dataset_root when training on COCO,')
        dataset_train = CocoDataset(parser.dataset_root,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.dataset_root,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    elif parser.dataset == 'FLIR':
        if parser.dataset_root is None:
            raise ValueError(
                'Must provide --dataset_root when training on FLIR,')
        _scale = 1.2
        dataset_train = FLIRDataset(parser.dataset_root,
                                    set_name='train',
                                    transform=transforms.Compose([
                                        Normalizer(),
                                        Augmenter(),
                                        Resizer(min_side=int(512 * _scale),
                                                max_side=int(640 * _scale),
                                                logger=net_logger)
                                    ]))
        dataset_val = FLIRDataset(parser.dataset_root,
                                  set_name='val',
                                  transform=transforms.Compose([
                                      Normalizer(),
                                      Resizer(min_side=int(512 * _scale),
                                              max_side=int(640 * _scale))
                                  ]))
    elif parser.dataset == 'csv':
        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')
        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')
        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))
        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be FLIR, COCO or csv), exiting.'
        )

    # Original RetinaNet code
    # sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
    # dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)
    # if dataset_val is not None:
    #     sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    #     dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

    dataloader_train = DataLoader(dataset_train,
                                  batch_size=parser.batch_size,
                                  num_workers=parser.workers,
                                  shuffle=True,
                                  collate_fn=collater,
                                  pin_memory=True)
    dataloader_val = DataLoader(dataset_val,
                                batch_size=1,
                                num_workers=parser.workers,
                                shuffle=False,
                                collate_fn=collater,
                                pin_memory=True)

    build_param = {'logger': net_logger}
    if parser.resume is not None:
        net_logger.info('Loading Checkpoint : {}'.format(parser.resume))
        retinanet = torch.load(parser.resume)
        s_b = parser.resume.rindex('_')
        s_e = parser.resume.rindex('.')
        start_epoch = int(parser.resume[s_b + 1:s_e]) + 1
        net_logger.info('Continue on {} Epoch'.format(start_epoch))
    else:
        # Create the model
        if parser.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       **build_param)
        elif parser.depth == 34:
            retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       **build_param)
        elif parser.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       **build_param)
        elif parser.depth == 101:
            retinanet = model.resnet101(
                num_classes=dataset_train.num_classes(),
                pretrained=True,
                **build_param)
        elif parser.depth == 152:
            retinanet = model.resnet152(
                num_classes=dataset_train.num_classes(),
                pretrained=True,
                **build_param)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')
        start_epoch = 0

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    net_logger.info('Weight Decay  : {}'.format(parser.weight_decay))
    net_logger.info('Learning Rate : {}'.format(parser.lr))

    # optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    optimizer = optim.Adam(retinanet.parameters(),
                           lr=parser.lr,
                           weight_decay=parser.weight_decay)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    # print('Num training images: {}'.format(len(dataset_train)))
    net_logger.info('Num Training Images: {}'.format(len(dataset_train)))

    for epoch_num in range(start_epoch, parser.epochs):
        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                # print(data['img'][0,:,:,:].shape)
                # print(data['annot'])
                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot']])
                else:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()

                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))

                if (iter_num % 10 == 0):
                    _log = 'Epoch: {} | Iter: {} | Class loss: {:1.5f} | BBox loss: {:1.5f} | Running loss: {:1.5f}'.format(
                        epoch_num, iter_num, float(classification_loss),
                        float(regression_loss), np.mean(loss_hist))
                    net_logger.info(_log)

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if (epoch_num + 1) % 1 == 0:
            test(dataset_val, retinanet, epoch_num, parser, net_logger)

        # if parser.dataset == 'coco':

        #     print('Evaluating dataset')

        #     coco_eval.evaluate_coco(dataset_val, retinanet)

        # elif parser.dataset == 'csv' and parser.csv_val is not None:

        #     print('Evaluating dataset')

        #     mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))
        print('Learning Rate:', str(scheduler._last_lr))
        torch.save(
            retinanet.module,
            os.path.join(
                'saved', '{}_{}_{}.pt'.format(parser.dataset, network_name,
                                              epoch_num)))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt')
Beispiel #15
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description="Simple training script for training a RetinaNet network."
    )

    parser.add_argument("--dataset", help="Dataset type, must be one of csv or coco.")
    parser.add_argument("--model", default=None, help="Path to trained model")
    parser.add_argument("--coco_path", help="Path to COCO directory")
    parser.add_argument(
        "--csv_train", help="Path to file containing training annotations (see readme)"
    )
    parser.add_argument(
        "--csv_classes", help="Path to file containing class list (see readme)"
    )
    parser.add_argument(
        "--csv_val",
        help="Path to file containing validation annotations (optional, see readme)",
    )

    parser.add_argument(
        "--depth",
        help="Resnet depth, must be one of 18, 34, 50, 101, 152",
        type=int,
        default=50,
    )
    parser.add_argument("--epochs", help="Number of epochs", type=int, default=100)
    parser.add_argument(
        "--result_dir",
        default="results",
        help="Path to store training results",
        type=str,
    )
    parser.add_argument(
        "--batch_num", default=8, help="Number of samples in a batch", type=int
    )

    parser = parser.parse_args(args)

    print(parser)

    # parameters
    BATCH_SIZE = parser.batch_num
    IMAGE_MIN_SIDE = 1440
    IMAGE_MAX_SIDE = 2560

    # Create the data loaders
    if parser.dataset == "coco":

        if parser.coco_path is None:
            raise ValueError("Must provide --coco_path when training on COCO,")
        # TODO: parameterize arguments for Resizer, and other transform functions
        # resizer: min_side=608, max_side=1024
        dataset_train = CocoDataset(
            parser.coco_path,
            # set_name="train2017",
            set_name="train_images_full",
            transform=transforms.Compose(
                [Normalizer(), Augmenter(), Resizer(passthrough=True),]
            ),
        )
        dataset_val = CocoDataset(
            parser.coco_path,
            # set_name="val2017",
            set_name="val_images_full",
            transform=transforms.Compose([Normalizer(), Resizer(passthrough=True),]),
        )

    elif parser.dataset == "csv":

        if parser.csv_train is None:
            raise ValueError("Must provide --csv_train when training on COCO,")

        if parser.csv_classes is None:
            raise ValueError("Must provide --csv_classes when training on COCO,")

        dataset_train = CSVDataset(
            train_file=parser.csv_train,
            class_list=parser.csv_classes,
            transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]),
        )

        if parser.csv_val is None:
            dataset_val = None
            print("No validation annotations provided.")
        else:
            dataset_val = CSVDataset(
                train_file=parser.csv_val,
                class_list=parser.csv_classes,
                transform=transforms.Compose([Normalizer(), Resizer()]),
            )

    else:
        raise ValueError("Dataset type not understood (must be csv or coco), exiting.")

    sampler = AspectRatioBasedSampler(
        dataset_train, batch_size=BATCH_SIZE, drop_last=False
    )
    dataloader_train = DataLoader(
        dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler
    )

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(
            dataset_val, batch_size=BATCH_SIZE, drop_last=False
        )
        dataloader_val = DataLoader(
            dataset_val, num_workers=16, collate_fn=collater, batch_sampler=sampler_val
        )

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(
            num_classes=dataset_train.num_classes(), pretrained=True
        )
    elif parser.depth == 34:
        retinanet = model.resnet34(
            num_classes=dataset_train.num_classes(), pretrained=True
        )
    elif parser.depth == 50:
        retinanet = model.resnet50(
            num_classes=dataset_train.num_classes(), pretrained=True
        )
    elif parser.depth == 101:
        retinanet = model.resnet101(
            num_classes=dataset_train.num_classes(), pretrained=True
        )
    elif parser.depth == 152:
        retinanet = model.resnet152(
            num_classes=dataset_train.num_classes(), pretrained=True
        )
    else:
        raise ValueError("Unsupported model depth, must be one of 18, 34, 50, 101, 152")

    if parser.model:
        retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, patience=3, verbose=True
    )

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print("Num training images: {}".format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        p_bar = tqdm(dataloader_train)
        for iter_num, data in enumerate(p_bar):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data["img"].cuda().float(), data["annot"]]
                    )
                else:
                    classification_loss, regression_loss = retinanet(
                        [data["img"].float(), data["annot"]]
                    )

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                mean_loss = np.mean(loss_hist)
                p_bar.set_description(
                    f"Epoch: {epoch_num} | Iteration: {iter_num} | "
                    f"Class loss: {float(classification_loss.item()):.5f} | "
                    f"Regr loss: {float(regression_loss.item()):.5f} | "
                    f"Running loss: {mean_loss:.5f}"
                )

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == "coco":

            print("Evaluating dataset")

            coco_eval.evaluate_coco(
                dataset_val, retinanet, result_dir=parser.result_dir
            )

        elif parser.dataset == "csv" and parser.csv_val is not None:

            print("Evaluating dataset")

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        # TODO: Fix string formating mix (adopt homogeneous format)
        torch.save(
            retinanet.module,
            f"{parser.result_dir}/"
            + "{}_retinanet_{}.pt".format(parser.dataset, epoch_num),
        )

    retinanet.eval()

    torch.save(retinanet, "model_final.pt")
Beispiel #16
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default='show')
    parser.add_argument('--coco_path', help='Path to COCO directory', default='/mnt/marathon')
    parser.add_argument('--image_size', help='image size', type=int, nargs=2, default=IMAGE_SIZE)
    parser.add_argument('--limit', help='limit', type=int, nargs=2, default=(0, 0))
    parser.add_argument('--batch_size', help='batch size', type=int, default=BATCH_SIZE)
    parser.add_argument('--num_works', help='num works', type=int, default=NUM_WORKERS)
    parser.add_argument('--num_classes', help='num classes', type=int, default=3)
    parser.add_argument('--merge_val', help='merge_val', type=int, default=MERGE_VAL)
    parser.add_argument('--do_aug', help='do_aug', type=int, default=DO_AUG)
    parser.add_argument('--lr_choice', default=LR_CHOICE, choices=['lr_scheduler', 'lr_map', 'lr_fn'], type=str)
    parser.add_argument('--lr', help='lr', type=float, default=LR)
    parser.add_argument("--lr_map", dest="lr_map", action=StoreDictKeyPair, default=LR_MAP)
    parser.add_argument("--lr_fn", dest="lr_fn", action=StoreDictKeyPair, default=LR_FN)
    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=DEPTH)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=EPOCHS)
    parser = parser.parse_args(args)

    print('dataset:', parser.dataset)
    print('depth:', parser.depth)
    print('epochs:', parser.epochs)
    print('image_size:', parser.image_size)
    print('batch_size:', parser.batch_size)
    print('num_works:', parser.num_works)
    print('merge_val:', parser.merge_val)
    print('do_aug:', parser.do_aug)
    print('lr_choice:', parser.lr_choice)
    print('lr:', parser.lr)
    print('lr_map:', parser.lr_map)
    print('lr_fn:', parser.lr_fn)
    print('num_classes:', parser.num_classes)
    print('limit:', parser.limit)

    # Create the data loaders
    # dataset_train, _ = torch.utils.data.random_split(dataset_train, [NUM_COCO_DATASET_TRAIN, len(dataset_train) - NUM_COCO_DATASET_TRAIN])
    # dataset_val, _ = torch.utils.data.random_split(dataset_val, [NUM_COCO_DATASET_VAL, len(dataset_val) - NUM_COCO_DATASET_VAL])

    transform_train = None
    transform_vail = None
    collate_fn = None
    if parser.do_aug:
        transform_train = get_augumentation('train', parser.image_size[0], parser.image_size[1])
        transform_vail = get_augumentation('test', parser.image_size[0], parser.image_size[1])
        collate_fn = detection_collate
    else:
        transform_train = transforms.Compose([
            # Normalizer(),
            # Augmenter(),
            Resizer(*parser.image_size)])
        transform_vail = transforms.Compose([
            # Normalizer(), 
            Resizer(*parser.image_size)])
        collate_fn = collater

    if parser.dataset == 'h5':
        dataset_train = H5CoCoDataset('{}/train_small.hdf5'.format(parser.coco_path), 'train_small')
        dataset_val = H5CoCoDataset('{}/test.hdf5'.format(parser.coco_path), 'test')
    else:
        dataset_train = CocoDataset(parser.coco_path, set_name='train_small', do_aug=parser.do_aug,
            transform=transform_train, limit_len=parser.limit[0])
        dataset_val = CocoDataset(parser.coco_path, set_name='test', do_aug=parser.do_aug,
            transform=transform_vail, limit_len=parser.limit[1])

    # 混合val
    if parser.merge_val:
        dataset_train += dataset_val

    print('training images: {}'.format(len(dataset_train)))
    print('val images: {}'.format(len(dataset_val)))
    
    steps_pre_epoch = len(dataset_train) // parser.batch_size
    print('steps_pre_epoch:', steps_pre_epoch)

    sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
    dataloader_train = DataLoader(dataset_train, batch_size=1, num_workers=parser.num_works, shuffle=False,
        collate_fn=collate_fn, batch_sampler=sampler)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=parser.num_classes, pretrained=PRETRAINED)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=parser.num_classes, pretrained=PRETRAINED)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=parser.num_classes, pretrained=PRETRAINED)
    elif parser.depth == 101250:
        retinanet = model.resnet101with50weight(num_classes=parser.num_classes, pretrained=PRETRAINED)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=parser.num_classes, pretrained=PRETRAINED)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=parser.num_classes, pretrained=PRETRAINED)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    retinanet = retinanet.cuda()
    retinanet = torch.nn.DataParallel(retinanet).cuda()
    retinanet.training = True

    if parser.lr_choice == 'lr_map':
        lr_now = lr_change_map(1, 0, parser.lr_map)
    elif parser.lr_choice == 'lr_fn':
        lr_now = float(parser.lr_fn['LR_START'])
    elif parser.lr_choice == 'lr_scheduler':
        lr_now = parser.lr

    # optimizer = optim.Adam(retinanet.parameters(), lr=lr_now)
    optimizer = optim.AdamW(retinanet.parameters(), lr=lr_now)
    # optimizer = optim.SGD(retinanet.parameters(), lr=lr_now, momentum=0.9, weight_decay=5e-4)
    # optimizer = optim.SGD(retinanet.parameters(), lr=lr_now)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=PATIENCE, factor=FACTOR, verbose=True)
    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    iteration_loss_path = 'iteration_loss.csv'
    if os.path.isfile(iteration_loss_path):
        os.remove(iteration_loss_path)
    
    epoch_loss_path = 'epoch_loss.csv'
    if os.path.isfile(epoch_loss_path):
        os.remove(epoch_loss_path)
    
    eval_train_path = 'eval_train_result.csv'
    if os.path.isfile(eval_train_path):
        os.remove(eval_train_path)

    eval_val_path = 'eval_val_result.csv'
    if os.path.isfile(eval_val_path):
        os.remove(eval_val_path)

    USE_KAGGLE = True if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', False) else False
    if USE_KAGGLE:
        iteration_loss_path = '/kaggle/working/' + iteration_loss_path
        epoch_loss_path = '/kaggle/working/' + epoch_loss_path
        eval_val_path = '/kaggle/working/' + eval_val_path
        eval_train_path = '/kaggle/working/' + eval_train_path

    with open(epoch_loss_path, 'a+') as epoch_loss_file, \
         open(iteration_loss_path, 'a+') as iteration_loss_file, \
         open(eval_train_path, 'a+') as eval_train_file, \
         open(eval_val_path, 'a+') as eval_val_file:

        epoch_loss_file.write('epoch_num,mean_epoch_loss\n')
        iteration_loss_file.write('epoch_num,iteration,classification_loss,regression_loss,iteration_loss\n')
        eval_train_file.write('epoch_num,map50\n')
        eval_val_file.write('epoch_num,map50\n')

        for epoch_num in range(parser.epochs):
            retinanet.train()
            retinanet.module.freeze_bn()

            epoch_loss = []
            for iter_num, data in enumerate(dataloader_train):
                optimizer.zero_grad()
                classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))

                iteration_loss = np.mean(loss_hist)
                print('\rEpoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                      epoch_num+1, iter_num+1, float(classification_loss), float(regression_loss), iteration_loss), end=' ' * 50)

                iteration_loss_file.write('{},{},{:1.5f},{:1.5f},{:1.5f}\n'.format(epoch_num+1,
                    epoch_num * steps_pre_epoch + (iter_num+1), float(classification_loss), float(regression_loss),
                    iteration_loss))
                iteration_loss_file.flush()

                del classification_loss
                del regression_loss

            mean_epoch_loss = np.mean(epoch_loss)
            epoch_loss_file.write('{},{:1.5f}\n'.format(epoch_num+1, mean_epoch_loss))
            epoch_loss_file.flush()

            if parser.lr_choice == 'lr_map':
                lr_now = lr_change_map(epoch_num+1, lr_now, parser.lr_map)
                adjust_learning_rate(optimizer, lr_now)
            elif parser.lr_choice == 'lr_fn':
                lr_now = lrfn(epoch_num+1, parser.lr_fn)
                adjust_learning_rate(optimizer, lr_now)
            elif parser.lr_choice == 'lr_scheduler':
                scheduler.step(mean_epoch_loss)

            # if parser.dataset != 'show':
            #     print('Evaluating dataset_train')
            #     coco_eval.evaluate_coco(dataset_train, retinanet, parser.dataset, parser.do_aug, eval_train_file, epoch_num)

            print('Evaluating dataset_val')
            coco_eval.evaluate_coco(dataset_val, retinanet, parser.dataset, parser.do_aug, eval_val_file, epoch_num)
    return parser
Beispiel #17
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    # parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--HW2_path', help='Path to HW2 directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'HW2':

        if parser.HW2_path is None:
            raise ValueError('Must provide --HW2_path when training on HW2,')

        dataset_train = HW2Dataset(parser.HW2_path,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))
        #dataset_val = HW2Dataset(parser.HW2_path,
        #                          transform=transforms.Compose([Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    # sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=8,
                                  num_workers=3,
                                  collate_fn=collater)

    # if dataset_val is not None:
    #     sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    #     dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
        #retinanet.load_state_dict(torch.load('coco_resnet_50_map_0_335_state_dict.pt'))
        #retinanet_state = retinanet.state_dict()
        #loaded = torch.load('coco_resnet_50_map_0_335_state_dict.pt')
        #pretrained = {k:v for k, v in loaded.items() if k in retinanet_state}
        #retinanet_state.update(pretrained)
        #retinanet.load_state_dict(retinanet_state)
        retinanet = torch.load('saved_models_3/HW2_retinanet_0.pt')

    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(pre_epoch, parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot']])
                else:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].float(), data['annot']])
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        # if parser.dataset == 'coco':

        #     print('Evaluating dataset')

        #     coco_eval.evaluate_coco(dataset_val, retinanet)

        # elif parser.dataset == 'csv' and parser.csv_val is not None:

        #     print('Evaluating dataset')

        #     mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            'saved_models_3/{}_retinanet_{}.pt'.format(parser.dataset,
                                                       epoch_num))

    # retinanet.eval()

    torch.save(retinanet, 'saved_models_3/model_final.pt')
def detect_image(video_path, model_path):

    # with open(class_list, 'r') as f:
    #     classes = load_classes(csv.reader(f, delimiter=','))
    classes = {
        0: u'__background__',
        1: u'person',
        2: u'bicycle',
        3: u'car',
        4: u'motorcycle',
        5: u'airplane',
        6: u'bus',
        7: u'train',
        8: u'truck',
        9: u'boat',
        10: u'traffic light',
        11: u'fire hydrant',
        12: u'stop sign',
        13: u'parking meter',
        14: u'bench',
        15: u'bird',
        16: u'cat',
        17: u'dog',
        18: u'horse',
        19: u'sheep',
        20: u'cow',
        21: u'elephant',
        22: u'bear',
        23: u'zebra',
        24: u'giraffe',
        25: u'backpack',
        26: u'umbrella',
        27: u'handbag',
        28: u'tie',
        29: u'suitcase',
        30: u'frisbee',
        31: u'skis',
        32: u'snowboard',
        33: u'sports ball',
        34: u'kite',
        35: u'baseball bat',
        36: u'baseball glove',
        37: u'skateboard',
        38: u'surfboard',
        39: u'tennis racket',
        40: u'bottle',
        41: u'wine glass',
        42: u'cup',
        43: u'fork',
        44: u'knife',
        45: u'spoon',
        46: u'bowl',
        47: u'banana',
        48: u'apple',
        49: u'sandwich',
        50: u'orange',
        51: u'broccoli',
        52: u'carrot',
        53: u'hot dog',
        54: u'pizza',
        55: u'donut',
        56: u'cake',
        57: u'chair',
        58: u'couch',
        59: u'potted plant',
        60: u'bed',
        61: u'dining table',
        62: u'toilet',
        63: u'tv',
        64: u'laptop',
        65: u'mouse',
        66: u'remote',
        67: u'keyboard',
        68: u'cell phone',
        69: u'microwave',
        70: u'oven',
        71: u'toaster',
        72: u'sink',
        73: u'refrigerator',
        74: u'book',
        75: u'clock',
        76: u'vase',
        77: u'scissors',
        78: u'teddy bear',
        79: u'hair drier',
        80: u'toothbrush'
    }

    vidcap = cv2.VideoCapture(video_path)
    success, image = vidcap.read()
    count = 0

    retinanet = resnet50(num_classes=80, )
    retinanet.load_state_dict(
        torch.load(model_path, map_location=torch.device('cpu')))
    model = retinanet

    labels = {}
    for key, value in classes.items():
        labels[key] = value

    if torch.cuda.is_available():
        model = model.cuda()

    model.training = False
    model.eval()
    rows, cols, cns = image.shape
    size = (cols, rows)

    out = cv2.VideoWriter('output3.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15,
                          size)
    while success:
        success, image = vidcap.read()

        if (not success) or (image is None):
            continue
        image_orig = image.copy()

        rows, cols, cns = image.shape

        smallest_side = min(rows, cols)

        # rescale the image so the smallest side is min_side
        min_side = 608
        max_side = 1024
        scale = min_side / smallest_side

        # check if the largest side is now greater than max_side, which can happen
        # when images have a large aspect ratio
        largest_side = max(rows, cols)

        if largest_side * scale > max_side:
            scale = max_side / largest_side

        # resize the image with the computed scale
        image = cv2.resize(image,
                           (int(round(cols * scale)), int(round(
                               (rows * scale)))))
        rows, cols, cns = image.shape

        pad_w = 32 - rows % 32
        pad_h = 32 - cols % 32

        new_image = np.zeros(
            (rows + pad_w, cols + pad_h, cns)).astype(np.float32)
        new_image[:rows, :cols, :] = image.astype(np.float32)
        image = new_image.astype(np.float32)
        image /= 255
        image -= [0.485, 0.456, 0.406]
        image /= [0.229, 0.224, 0.225]
        image = np.expand_dims(image, 0)
        image = np.transpose(image, (0, 3, 1, 2))

        with torch.no_grad():

            image = torch.from_numpy(image)
            if torch.cuda.is_available():
                image = image.cuda()

            st = time.time()
            print(image.shape, image_orig.shape, scale)
            scores, classification, transformed_anchors = model(
                image.cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.5)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]

                x1 = int(bbox[0] / scale)
                y1 = int(bbox[1] / scale)
                x2 = int(bbox[2] / scale)
                y2 = int(bbox[3] / scale)
                label_name = labels[int(classification[idxs[0][j]])]
                #print(int(classification[idxs[0][j]]))
                label_name = str(int(classification[idxs[0][j]]))
                print(bbox, classification.shape)
                score = scores[j]
                caption = '{} {:.3f}'.format(label_name, score)
                # draw_caption(img, (x1, y1, x2, y2), label_name)
                draw_caption(image_orig, (x1, y1, x2, y2), caption)
                cv2.rectangle(image_orig, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)

            out.write(image_orig)
    out.release()
Beispiel #19
0
def main(args=None):
	parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
	parser.add_argument('--coco_path', help='Path to COCO directory')
	parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
	parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
	parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

	parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
	parser.add_argument('--config', help='Config file path that contains scale and ratio values', type=str)
	parser.add_argument('--epochs', help='Number of epochs', type=int, default=50)
	parser.add_argument('--init-lr', help='Initial learning rate for training process', type=float, default=1e-3)
	parser.add_argument('--batch-size', help='Number of input images per step', type=int, default=1)
	parser.add_argument('--num-workers', help='Number of worker used in dataloader', type=int, default=1)

	# For resuming training from saved checkpoint
	parser.add_argument('--resume', help='Whether to resume training from checkpoint', action='store_true')
	parser.add_argument('--saved-ckpt', help='Resume training from this checkpoint', type=str)

	parser.add_argument('--multi-gpus', help='Allow to use multi gpus for training task', action='store_true')
	parser.add_argument('--snapshots', help='Location to save training snapshots', type=str, default="snapshots")

	parser.add_argument('--log-dir', help='Location to save training logs', type=str, default="logs")
	parser.add_argument('--expr-augs', help='Allow to use use experiment augmentation methods', action='store_true')
	parser.add_argument('--aug-methods', help='(Experiment) Augmentation methods to use, separate by comma symbol', type=str, default="rotate,hflip,brightness,contrast")
	parser.add_argument('--aug-prob', help='Probability of applying (experiment) augmentation in range [0.,1.]', type=float, default=0.5)

	parser = parser.parse_args(args)

	train_transforms = [Normalizer(), Resizer(), Augmenter()]

	# Define transform methods
	if parser.expr_augs:
		aug_map = get_aug_map(p=parser.aug_prob)
		aug_methods = parser.aug_methods.split(",")
		for aug in aug_methods:
			if aug in aug_map.keys():
				train_transforms.append(aug_map[aug])
			else:
				print(f"{aug} is not available.")

	# Create the data loaders
	if parser.dataset == 'coco':

		if parser.coco_path is None:
			raise ValueError('Must provide --coco_path when training on COCO,')

		dataset_train = CocoDataset(parser.coco_path, set_name='train2017',
									transform=transforms.Compose(train_transforms))
		dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
								  transform=transforms.Compose([Normalizer(), Resizer()]))

	elif parser.dataset == 'csv':

		if parser.csv_train is None:
			raise ValueError('Must provide --csv_train when training on COCO,')

		if parser.csv_classes is None:
			raise ValueError('Must provide --csv_classes when training on COCO,')

		dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
								   transform=transforms.Compose(train_transforms))

		if parser.csv_val is None:
			dataset_val = None
			print('No validation annotations provided.')
		else:
			dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
									 transform=transforms.Compose([Normalizer(), Resizer()]))

	else:
		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

	sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
	dataloader_train = DataLoader(dataset_train, num_workers=parser.num_workers, collate_fn=collater, batch_sampler=sampler)

	if dataset_val is not None:
		sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=parser.batch_size, drop_last=False)
		dataloader_val = DataLoader(dataset_val, num_workers=parser.num_workers, collate_fn=collater, batch_sampler=sampler_val)

	config = dict({"scales": None,
					"ratios": None})
	
	if parser.config:
		config = load_config(parser.config, config)

	if parser.depth == 18:
		retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"])
	elif parser.depth == 34:
		retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"])
	elif parser.depth == 50:
		retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"])
	elif parser.depth == 101:
		retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"])
	elif parser.depth == 152:
		retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"])
	else:
		raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

	optimizer = optim.Adam(retinanet.parameters(), lr=parser.init_lr)

	if parser.resume:
		if not parser.saved_ckpt:
			print("No saved checkpoint provided for resuming training. Exiting now...")
			return 
		if not os.path.exists(parser.saved_ckpt):
			print("Invalid saved checkpoint path. Exiting now...")
			return

		# Restore last state
		retinanet, optimizer, start_epoch = load_ckpt(parser.saved_ckpt, retinanet, optimizer)
		if parser.epochs <= start_epoch:
			print("Number of epochs must be higher than number of trained epochs of saved checkpoint.")
			return

	use_gpu = True

	if use_gpu:
		print("Using GPU for training process")
		if torch.cuda.is_available():
			if parser.multi_gpus:
				print("Using multi-gpus for training process")
				retinanet = torch.nn.DataParallel(retinanet.cuda(), device_ids=[0,1])
			else:
				retinanet = torch.nn.DataParallel(retinanet.cuda())
	else:
		retinanet = torch.nn.DataParallel(retinanet)

	retinanet.training = True

	scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True)

	loss_hist = collections.deque(maxlen=500)

	retinanet.train()
	retinanet.module.freeze_bn()

	print('Num training images: {}'.format(len(dataset_train)))

	# Tensorboard writer
	writer = SummaryWriter(parser.log_dir)

	# Save snapshots dir
	if not os.path.exists(parser.snapshots):
		os.makedirs(parser.snapshots)

	best_mAP = 0
	start_epoch = 0 if not parser.resume else start_epoch 

	for epoch_num in range(start_epoch, parser.epochs):

		retinanet.train()
		retinanet.module.freeze_bn()

		epoch_loss = []
		epoch_csf_loss = []
		epoch_reg_loss = []

		for iter_num, data in enumerate(dataloader_train):
			try:
				optimizer.zero_grad()

				if torch.cuda.is_available():
					with torch.cuda.device(0):
						classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
				else:
					classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']])
					
				classification_loss = classification_loss.mean()
				regression_loss = regression_loss.mean()

				loss = classification_loss + regression_loss
				epoch_csf_loss.append(float(classification_loss))
				epoch_reg_loss.append(float(regression_loss))

				if bool(loss == 0):
					continue

				loss.backward()

				torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

				optimizer.step()

				loss_hist.append(float(loss))

				epoch_loss.append(float(loss))

				print(
					'\rEpoch: {}/{} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
						(epoch_num + 1), parser.epochs, (iter_num + 1), len(dataloader_train), float(classification_loss), float(regression_loss), np.mean(loss_hist)), end='')

				del classification_loss
				del regression_loss
			except Exception as e:
				print(e)
				continue

		# writer.add_scalar("Loss/train", loss, epoch_num)

		_epoch_loss = np.mean(epoch_loss)
		_epoch_csf_loss = np.mean(epoch_reg_loss)
		_epoch_reg_loss = np.mean(epoch_reg_loss)

		if parser.dataset == 'coco':

			print('Evaluating dataset')

			coco_eval.evaluate_coco(dataset_val, retinanet)

			scheduler.step(_epoch_loss)

		elif parser.dataset == 'csv' and parser.csv_val is not None:

			print('\nEvaluating dataset')

			APs = csv_eval.evaluate(dataset_val, retinanet)
			mAP = round(mean(APs[ap][0] for ap in APs.keys()), 5)
			print("mAP: %f" %mAP)
			writer.add_scalar("validate/mAP", mAP, epoch_num)
			
			# Handle lr_scheduler wuth mAP value
			scheduler.step(mAP)


		lr = get_lr(optimizer)
		writer.add_scalar("train/classification-loss", _epoch_csf_loss, epoch_num)
		writer.add_scalar("train/regression-loss", _epoch_reg_loss, epoch_num)
		writer.add_scalar("train/loss", _epoch_loss, epoch_num)
		writer.add_scalar("train/learning-rate", lr, epoch_num)

		# Save model file, optimizer and epoch number

		checkpoint = {
		    'epoch': epoch_num,
		    'state_dict': retinanet.state_dict(),
		    'optimizer': optimizer.state_dict(),
		}

		# torch.save(retinanet.module, os.path.join(parser.snapshots, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)))
		
		# Check whether this epoch's model achieves highest mAP value
		is_best = False
		if best_mAP < mAP:
			best_mAP = mAP 
			is_best = True  

		save_ckpt(checkpoint, is_best, parser.snapshots, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num + 1))

		print('\n')

	retinanet.eval()

	torch.save(retinanet, 'model_final.pt')

	writer.flush()
Beispiel #20
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description=
        'Simple paps training script for training a RetinaNet network.')
    parser.add_argument('--batch_size',
                        help='Number of batchs',
                        type=int,
                        default=0)
    parser.add_argument('--test_data',
                        help='test data file',
                        default='data/test.npy')
    parser.add_argument('--model_dir',
                        help='pretrained model dir',
                        default='trained_models/resnet50_640/model.pt')
    parser.add_argument('--threshold',
                        help='pretrained model dir',
                        type=float,
                        default=0.1)

    parser = parser.parse_args(args)

    GPU_NUM = 0  # 원하는 GPU 번호 입력
    device = torch.device(
        f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
    torch.cuda.set_device(device)  # change allocation of current GPU
    print('device', device)

    retinanet = model.resnet50(num_classes=2, device=device)
    retinanet = torch.nn.DataParallel(retinanet,
                                      device_ids=[GPU_NUM],
                                      output_device=GPU_NUM).to(device)
    retinanet.load_state_dict(torch.load(parser.model_dir))
    #     retinanet.to(device)

    dataset_val = PapsDataset('data/',
                              set_name='val_2class',
                              transform=val_transforms)

    val_data_loader = DataLoader(dataset_val,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=4,
                                 collate_fn=collate_fn)

    retinanet.eval()
    start_time = time.time()
    threshold = parser.threshold
    results = []
    GT_results = []
    image_ids = []
    cnt = 0

    for index, data in enumerate(tqdm(val_data_loader)):
        if cnt > 100:
            break
        cnt += 1
        with torch.no_grad():
            images, tbox, tlabel, targets = data
            batch_size = len(images)
            #         print(tbox)
            #         print(len(tbox[0]))

            c, h, w = images[0].shape
            images = torch.cat(images).view(-1, c, h, w).to(device)

            outputs = retinanet(images)
            scores, labels, boxes = (outputs)

            scores = scores.cpu()
            labels = labels.cpu()
            boxes = boxes.cpu()

            if boxes.shape[0] > 0:
                # change to (x, y, w, h) (MS COCO standard)
                boxes[:, 2] -= boxes[:, 0]
                boxes[:, 3] -= boxes[:, 1]
                #             print(boxes)

                # compute predicted labels and scores
                #for box, score, label in zip(boxes[0], scores[0], labels[0]):
                for box_id in range(boxes.shape[0]):
                    score = float(scores[box_id])
                    label = int(labels[box_id])
                    box = boxes[box_id, :]

                    # scores are sorted, so we can break
                    if score < threshold:
                        break

                    # append detection for each positively labeled class
                    image_result = {
                        'image_id': dataset_val.image_ids[index],
                        'category_id': dataset_val.label_to_coco_label(label),
                        'score': float(score),
                        'bbox': box.tolist(),
                    }

                    # append detection to results
                    results.append(image_result)

            if len(tbox[0]) > 0:

                # compute predicted labels and scores
                #for box, score, label in zip(boxes[0], scores[0], labels[0]):
                for box_id in range(len(tbox[0])):
                    score = float(0.99)
                    label = (tlabel[0][box_id])
                    box = list(tbox[0][box_id])
                    box[2] -= box[0]
                    box[3] -= box[1]

                    # append detection for each positively labeled class
                    image_result = {
                        'image_id': dataset_val.image_ids[index],
                        'category_id': dataset_val.label_to_coco_label(label),
                        'score': float(score),
                        'bbox': list(box),
                    }

                    # append detection to results
                    GT_results.append(image_result)

            # append image to list of processed images
            image_ids.append(dataset_val.image_ids[index])

            # print progress
            print('{}/{}'.format(index, len(dataset_val)), end='\r')

    if not len(results):
        print('No object detected')
    print('GT_results', len(GT_results))
    print('pred_results', len(results))

    # write output
    json.dump(results,
              open(
                  'trained_models/eval/{}_bbox_results.json'.format(
                      dataset_val.set_name), 'w'),
              indent=4)
    # write GT
    json.dump(GT_results,
              open(
                  'trained_models/eval/{}_GTbbox_results.json'.format(
                      dataset_val.set_name), 'w'),
              indent=4)

    print('validation time :', time.time() - start_time)

    # load results in COCO evaluation tool
    coco_true = dataset_val.coco
    coco_pred = coco_true.loadRes(
        'trained_models/eval/{}_bbox_results.json'.format(
            dataset_val.set_name))
    coco_gt = coco_true.loadRes(
        'trained_models/eval/{}_GTbbox_results.json'.format(
            dataset_val.set_name))

    # run COCO evaluation
    # coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
    coco_eval = COCOeval(coco_gt, coco_pred, 'bbox')
    coco_eval.params.imgIds = image_ids
    # coco_eval.params.catIds = [0]
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
Beispiel #21
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default='coco')

    parser.add_argument('--coco_path',
                        help='Path to COCO directory',
                        default='cocodataset')

    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--model_path',
                        help='Path to model (.pt) file.',
                        type=str,
                        default='coco_resnet_50_map_0_335_state_dict.pt')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_train,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    retinanet = torch.load(parser.model_path)

    # Create the model
    retinanet = model.resnet50(num_classes=dataset_val.num_classes(),
                               pretrained=True)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.load_state_dict(torch.load(parser.model_path))

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):

        with torch.no_grad():
            st = time.time()

            scores, classification, transformed_anchors = retinanet(
                data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(
                    classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)

                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)
                print(label_name)

            cv2.imshow('img', img)
            cv2.waitKey(0)
import torch
from retinanet import model
from icecream import ic

retinanet = model.resnet50(num_classes=81, pretrained=False).cuda()
retinanet.eval()
x = torch.rand([10, 3, 128, 128]).cuda()

retinanet(x)
Beispiel #23
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--model', help='Path to model (.pt) file.')

    parser.add_argument('--finetune',
                        help='if load trained retina model',
                        type=bool,
                        default=False)
    parser.add_argument('--gpu', help='', type=bool, default=False)
    parser.add_argument('--batch_size', help='', type=int, default=2)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    #sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
    sampler = AspectRatioBasedSampler(dataset_train,
                                      parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    '''
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
    '''

    use_gpu = parser.gpu

    #import pdb
    #pdb.set_trace()

    #读coco预训练模型
    retinanet = model.resnet50(num_classes=80, pretrained=True)
    retinanet.load_state_dict(torch.load(parser.model))
    for param in retinanet.parameters():
        param.requires_grad = False

    retinanet.regressionModel = model.RegressionModel(256)
    retinanet.classificationModel = model.ClassificationModel(
        256, num_classes=dataset_train.num_classes())

    prior = 0.01
    retinanet.classificationModel.output.weight.data.fill_(0)
    retinanet.classificationModel.output.bias.data.fill_(-math.log(
        (1.0 - prior) / prior))

    retinanet.regressionModel.output.weight.data.fill_(0)
    retinanet.regressionModel.output.bias.data.fill_(0)

    # for m in retinanet.classificationModel.modules():
    #     if isinstance(m, nn.Conv2d):
    #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
    #         m.weight.data.normal_(0, math.sqrt(2. / n))
    #     elif isinstance(m, nn.BatchNorm2d):
    #         m.weight.data.fill_(1)
    #         m.bias.data.zero_()

    # for m in retinanet.regressionModel.modules():
    #     if isinstance(m, nn.Conv2d):
    #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
    #         m.weight.data.normal_(0, math.sqrt(2. / n))
    #     elif isinstance(m, nn.BatchNorm2d):
    #         m.weight.data.fill_(1)
    #         m.bias.data.zero_()

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if use_gpu and torch.cuda.is_available():
        #retinanet.load_state_dict(torch.load(parser.model))
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        #retinanet.load_state_dict(torch.load(parser.model))
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(
        [{
            'params': retinanet.module.regressionModel.parameters()
        }, {
            'params': retinanet.module.classificationModel.parameters()
        }], 1e-6)

    #optimizer = optim.Adam(retinanet.parameters(), lr=1e-6)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                #import pdb
                #pdb.set_trace()

                optimizer.zero_grad()

                if use_gpu and torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        if epoch_num % 5 == 0:
            torch.save(
                retinanet.module,
                '{}_freezinetune_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()
Beispiel #24
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--exp_name',
                        help='Path to folder for saving the model and log',
                        type=str)
    parser.add_argument('--output_folder',
                        help='Path to folder for saving all the experiments',
                        type=str)

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)  # 100
    parser.add_argument('--batch_size', help='Batch size', type=int, default=2)
    parser.add_argument('--lr',
                        help='Number of epochs',
                        type=float,
                        default=1e-5)
    parser.add_argument('--caption',
                        help='Any thing in particular about the experiment',
                        type=str)
    parser.add_argument('--server',
                        help='seerver name',
                        type=str,
                        default='ultron')
    parser.add_argument('--detector',
                        help='detection algo',
                        type=str,
                        default='RetinaNet')
    parser.add_argument('--arch', help='model architecture', type=str)
    parser.add_argument('--pretrain', default=False, action='store_true')
    parser.add_argument('--freeze_batchnorm',
                        default=False,
                        action='store_true')

    parser = parser.parse_args(args)

    output_folder_path = os.path.join(parser.output_folder, parser.exp_name)
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    PARAMS = {
        'dataset': parser.dataset,
        'exp_name': parser.exp_name,
        'depth': parser.depth,
        'epochs': parser.epochs,
        'batch_size': parser.batch_size,
        'lr': parser.lr,
        'caption': parser.caption,
        'server': parser.server,
        'arch': parser.arch,
        'pretrain': parser.pretrain,
        'freeze_batchorm': parser.freeze_batchnorm
    }

    exp = neptune.create_experiment(
        name=parser.exp_name,
        params=PARAMS,
        tags=[parser.arch, parser.detector, parser.dataset, parser.server])

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18 and parser.arch == 'Resnet':
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=parser.pretrain)
    elif parser.depth == 10 and parser.arch == 'Resnet':
        retinanet = model.resnet10(num_classes=dataset_train.num_classes(),
                                   pretrained=parser.pretrain)
    elif parser.depth == 18 and parser.arch == 'BiRealNet18':
        checkpoint_path = None
        if parser.pretrain:
            checkpoint_path = '/media/Rozhok/Bi-Real-net/pytorch_implementation/BiReal18_34/models/imagenet_baseline/checkpoint.pth.tar'
        retinanet = birealnet18(checkpoint_path,
                                num_classes=dataset_train.num_classes())
    elif parser.depth == 34 and parser.arch == 'Resnet':
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=parser.pretrain)
    elif parser.depth == 50 and parser.arch == 'Resnet':
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=parser.pretrain)
    elif parser.depth == 101 and parser.arch == 'Resnet':
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=parser.pretrain)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=parser.pretrain)
    elif parser.arch == 'ofa':
        print("Model is ResNet50D.")
        bn_momentum = 0.1
        bn_eps = 1e-5
        retinanet = ResNet50D(
            n_classes=dataset_train.num_classes(),
            bn_param=(bn_momentum, bn_eps),
            dropout_rate=0,
            width_mult=1.0,
            depth_param=3,
            expand_ratio=0.35,
        )

    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    print(retinanet)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    if parser.freeze_batchnorm:
        retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        exp.log_metric('Current lr', float(optimizer.param_groups[0]['lr']))
        exp.log_metric('Current epoch', int(epoch_num))

        retinanet.train()
        if parser.freeze_batchnorm:
            retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):

            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot']])
                else:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                exp.log_metric('Training: Classification loss',
                               float(classification_loss))
                exp.log_metric('Training: Regression loss',
                               float(regression_loss))
                exp.log_metric('Training: Totalloss', float(loss))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val,
                                    retinanet,
                                    output_folder_path,
                                    exp=exp)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            os.path.join(
                output_folder_path,
                '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)))

    retinanet.eval()

    torch.save(retinanet, os.path.join(output_folder_path, 'model_final.pt'))
Beispiel #25
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--model_path', help='Path to model', type=str)

    parser = parser.parse_args(args)

    dataset_val = CocoDataset(parser.coco_path,
                              set_name='val2017',
                              transform=transforms.Compose(
                                  [Normalizer(), Resizer()]))

    # Create the model
    retinanet = model.resnet50(num_classes=dataset_val.num_classes(),
                               pretrained=True)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet.load_state_dict(torch.load(parser.model_path))
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet.load_state_dict(torch.load(parser.model_path))
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = False
    retinanet.eval()
    retinanet.module.freeze_bn()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    for idx, data in enumerate(dataset_val):

        with torch.no_grad():
            st = time.time()
            # run network

            if torch.cuda.is_available():
                scores, labels, boxes = retinanet(data['img'].permute(
                    2, 0, 1).cuda().float().unsqueeze(dim=0))
            else:
                scores, labels, boxes = retinanet(data['img'].permute(
                    2, 0, 1).float().unsqueeze(dim=0))
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.5)

            tensor = data['img'] * np.array(
                [[[0.229, 0.224, 0.225]]]) + np.array([[[0.485, 0.456, 0.406]]
                                                       ])
            img = tensor.mul(255).clamp(0, 255).byte().cpu().numpy()

            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            for j in range(idxs[0].shape[0]):
                bbox = boxes[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(labels[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)

                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)
                print(label_name)

            cv2.imshow('img', img)
            cv2.waitKey(0)
Beispiel #26
0
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 21 12:30:54 2020

@author: worklab
"""

from retinanet.model import resnet50
import time
import torch



device = torch.device("cuda:0")

detector = resnet50(13,pretrained = True)
detector = detector.to(device)
detector.eval()
detector.freeze_bn()

transfer_times = []
detect_times = []
batch_sizes = [1,2,3,5,7,10,12,16,20,24,30,40,50,60,75,90,100]

for b in batch_sizes:
    transfer_time = 0
    detect_time = 0
    for i in range(0,1000):
        data = torch.randn([b,3,960,540])
        #data = torch.randn([b,3,2000,1000])
        start = time.time()
Beispiel #27
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description=
        'RegiGraph Pytorch Implementation Training Script. - Ahmed Nassar (ETHZ, IRISA).'
    )
    parser.add_argument("--batch_size",
                        type=int,
                        default=4,
                        help="The number of images per batch")
    parser.add_argument("--lr", type=float, default=1e-4)
    parser.add_argument(
        '--dataset_root',
        default='../datasets',
        help=
        'Dataset root directory path [../datasets/VOC, ../datasets/mapillary]')
    parser.add_argument('--dataset',
                        default='Pasadena',
                        choices=['Pasadena', 'Pasadena_Aerial', 'mapillary'],
                        type=str,
                        help='Pasadena, Pasadena_Aerial or mapillary')
    parser.add_argument("--overfit", type=int, default="0")
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument("--num_epochs", type=int, default=100)
    parser.add_argument("--log_path", type=str, default="tensorboard/")
    parser.add_argument("--saved_path", type=str, default="trained_models")
    parser.add_argument("--test_interval",
                        type=int,
                        default=1,
                        help="Number of epoches between testing phases")
    parser.add_argument(
        "--es_min_delta",
        type=float,
        default=0.0,
        help=
        "Early stopping's parameter: minimum change loss to qualify as an improvement"
    )
    parser.add_argument(
        "--es_patience",
        type=int,
        default=0,
        help=
        "Early stopping's parameter: number of epochs with no improvement after which training will be stopped. Set to 0 to disable this technique."
    )
    parser.add_argument("--cluster", type=int, default=0)

    opt = parser.parse_args(args)
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    if (opt.dataset == 'Pasadena' or opt.dataset == 'mapillary'
            or opt.dataset == 'Pasadena_Aerial'):
        train_dataset = VOCDetection(root=opt.dataset_root,
                                     overfit=opt.overfit,
                                     image_sets="trainval",
                                     transform=transforms.Compose([
                                         Normalizer(),
                                         Augmenter(),
                                         Resizer()
                                     ]),
                                     dataset_name=opt.dataset)
        valid_dataset = VOCDetection(root=opt.dataset_root,
                                     overfit=opt.overfit,
                                     image_sets="val",
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]),
                                     dataset_name=opt.dataset)

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    # sampler = AspectRatioBasedSampler(train_dataset, batch_size=2, drop_last=False)

    training_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": True,
        "collate_fn": collater,
        "num_workers": 4
    }

    training_generator = DataLoader(train_dataset, **training_params)

    if valid_dataset is not None:
        test_params = {
            "batch_size": opt.batch_size,
            "shuffle": False,
            "drop_last": False,
            "collate_fn": collater,
            "num_workers": 4
        }
        # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        test_generator = DataLoader(valid_dataset, **test_params)

    # Create the model
    if opt.depth == 18:
        retinanet = model.resnet18(num_classes=train_dataset.num_classes(),
                                   pretrained=True)
    elif opt.depth == 34:
        retinanet = model.resnet34(num_classes=train_dataset.num_classes(),
                                   pretrained=True)
    elif opt.depth == 50:
        retinanet = model.resnet50(num_classes=train_dataset.num_classes(),
                                   pretrained=True)
    elif opt.depth == 101:
        retinanet = model.resnet101(num_classes=train_dataset.num_classes(),
                                    pretrained=True)
    elif opt.depth == 152:
        retinanet = model.resnet152(num_classes=train_dataset.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    retinanet.training = True
    writer = SummaryWriter(opt.log_path + "regigraph_bs_" +
                           str(opt.batch_size) + "_dataset_" + opt.dataset +
                           "_backbone_" + str(opt.depth))
    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)
    best_loss = 1e5
    best_epoch = 0

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(train_dataset)))

    num_iter_per_epoch = len(training_generator)

    for epoch in range(opt.num_epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        progress_bar = tqdm(training_generator)

        for iter, data in enumerate(progress_bar):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    classification_loss, regression_loss, graph_loss = retinanet(
                        [
                            data['img'].cuda().float(), data['annot'],
                            data['geo'], data['batch_map']
                        ])
                else:
                    classification_loss, regression_loss, graph_loss = retinanet(
                        [
                            data['img'].float(), data['annot'], data['geo'],
                            data['batch_map']
                        ])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                graph_loss = graph_loss.mean()

                loss = classification_loss + regression_loss + graph_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                total_loss = np.mean(epoch_loss)

                if opt.cluster == 0:
                    progress_bar.set_description(
                        'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Graph loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}'
                        .format(epoch + 1, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, classification_loss,
                                regression_loss, graph_loss, float(loss),
                                total_loss))
                    writer.add_scalar('Train/Total_loss', total_loss,
                                      epoch * num_iter_per_epoch + iter)
                    writer.add_scalar('Train/Regression_loss', regression_loss,
                                      epoch * num_iter_per_epoch + iter)
                    writer.add_scalar('Train/Classfication_loss (focal loss)',
                                      classification_loss,
                                      epoch * num_iter_per_epoch + iter)
                    writer.add_scalar('Train/Graph_loss', graph_loss,
                                      epoch * num_iter_per_epoch + iter)

                del classification_loss
                del regression_loss
                del graph_loss
            except Exception as e:
                print(e)
                continue

        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            retinanet.eval()
            loss_regression_ls = []
            loss_classification_ls = []
            loss_graph_ls = []
            for iter, data in enumerate(test_generator):
                with torch.no_grad():
                    if torch.cuda.is_available():
                        classification_loss, regression_loss, graph_loss = retinanet(
                            [
                                data['img'].cuda().float(), data['annot'],
                                data['geo'], data['batch_map']
                            ])
                    else:
                        classification_loss, regression_loss, graph_loss = retinanet(
                            [
                                data['img'].float(), data['annot'],
                                data['geo'], data['batch_map']
                            ])

                    classification_loss = classification_loss.mean()
                    regression_loss = regression_loss.mean()
                    graph_loss = graph_loss.mean()
                    loss_classification_ls.append(float(classification_loss))
                    loss_regression_ls.append(float(regression_loss))
                    loss_graph_ls.append(float(graph_loss))
                    # print(len(loss_classification_ls),len(loss_regression_ls),len(loss_graph_ls))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            gph_loss = np.mean(loss_graph_ls)
            loss = cls_loss + reg_loss + gph_loss

            print(
                '- Val Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. * Graph loss: {:1.5f}. Total loss: {:1.5f}'
                .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                        gph_loss, np.mean(loss)))
            writer.add_scalar('Test/Total_loss', loss, epoch)
            writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            writer.add_scalar('Test/Graph_loss (graph loss)', gph_loss, epoch)
            writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss,
                              epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                # mAP = csv_eval.evaluate(valid_dataset, retinanet)
                # print(mAP)
                torch.save(
                    retinanet.module,
                    os.path.join(
                        opt.saved_path,
                        "regigraph_bs_" + str(opt.batch_size) + "_dataset_" +
                        opt.dataset + "_epoch_" + str(epoch + 1) +
                        "_backbone_" + str(opt.depth) + ".pth"))

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, loss))
                break
    writer.close()
Beispiel #28
0
    def initialize(self, opt):
        BaseModel.initialize(self, opt)
        # self.opt = opt
        self.isTrain = opt.isTrain

        # define tensors
        self.input_A = self.Tensor(opt.batchSize, opt.input_nc, opt.fineSize,
                                   opt.fineSize)
        self.input_B = self.Tensor(opt.batchSize, opt.output_nc, opt.fineSize,
                                   opt.fineSize)

        transform_list = [
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]

        self.transform = transforms.Compose(transform_list)
        self.det = model.resnet50(num_classes=2, pretrained=True).cuda()
        # load/define networks
        self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf,
                                      opt.which_model_netG, opt.norm,
                                      not opt.no_dropout, self.gpu_ids)
        #self.netG = networks.define_G(3, 3,opt.ngf, "PATN", "instance", not True, "normal",
        #0,n_downsampling=2)
        if self.isTrain:
            use_sigmoid = opt.no_lsgan
            self.netD_image = networks.define_image_D(
                opt.input_nc + opt.output_nc, opt.ndf, opt.which_model_netD,
                opt.n_layers_D, opt.norm, use_sigmoid, self.gpu_ids)
            use_sigmoid = not opt.no_lsgan
            self.det.training = True
            self.det.train()
            self.netD_person = networks.define_person_D_AC(
                opt.input_nc, opt.ndf, opt, use_sigmoid, self.gpu_ids)

        if not self.isTrain or opt.continue_train:
            #print(opt.which_epoch)
            self.load_network(self.netG, 'G', opt.which_epoch)
            if self.isTrain:
                self.load_network(self.netD_image, 'D_image', opt.which_epoch)
                self.load_network(self.netD_person, 'D_person',
                                  opt.which_epoch)

        if self.isTrain:
            self.fake_AB_pool = ImagePool(opt.pool_size)
            self.old_lr = opt.lr
            # define loss functions
            #print('haha'+ str(opt.no_lsgan))
            # self.criterionGAN = networks.GANLoss(use_lsgan=not opt.no_lsgan, tensor=self.Tensor)
            self.criterionGAN_image = networks.GANLoss(
                use_lsgan=not opt.no_lsgan, tensor=self.Tensor)
            self.criterionGAN_person = networks.GANLoss(use_lsgan=opt.no_lsgan,
                                                        tensor=self.Tensor)
            self.criterionL1 = torch.nn.L1Loss()

            # initialize optimizers
            self.optimizer_G = torch.optim.Adam(self.netG.parameters(),
                                                lr=opt.lr,
                                                betas=(opt.beta1, 0.999))
            self.optimizer_D_image = torch.optim.Adam(
                self.netD_image.parameters(),
                lr=opt.lr,
                betas=(opt.beta1, 0.999))
            self.optimizer_D_person = torch.optim.Adam(
                self.netD_person.parameters(),
                lr=opt.lr,
                betas=(opt.beta1, 0.999))
            self.optimizer_det = torch.optim.Adam(self.det.parameters(),
                                                  lr=opt.lr,
                                                  betas=(opt.beta1, 0.999))
        print('---------- Networks initialized -------------')
        networks.print_network(self.netG)
        if self.isTrain:
            networks.print_network(self.netD_image)
            networks.print_network(self.netD_person)
        print('-----------------------------------------------')
                                 transform=transforms.Compose([Normalizer(), Resizer()]))


# In[ ]:


sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False)
dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

if dataset_val is not None:
    sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

# Create the model

retinanet = model.resnet50(num_classes=dataset_train.num_classes())
use_gpu = True

# Initialising the checkpoint 
num_classes = 8
PATH_TO_WEIGHTS = "../pretrained_weights.pt"
retinanet = retinanet_model.resnet50(80)
checkpoint = torch.load(PATH_TO_WEIGHTS)
retinanet.load_state_dict(checkpoint)
retinanet.classificationModel.fc = nn.Linear(720, num_classes)

if use_gpu:
    retinanet = retinanet.cuda()
print("Model retinanet : ",retinanet)

Beispiel #30
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default='coco')
    parser.add_argument(
        '--coco_path',
        help='Path to COCO directory',
        default=
        '/media/zhuzhu/ec114170-f406-444f-bee7-a3dc0a86cfa2/dataset/coco')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser.add_argument('--use-gpu',
                        help='training on cpu or gpu',
                        action='store_false',
                        default=True)
    parser.add_argument('--device-ids', help='GPU device ids', default=[0])

    args = parser.parse_args()

    # ------------------------------ Create the data loaders -----------------------------
    if args.dataset == 'coco':

        if args.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(args.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(args.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    sampler_train = AspectRatioBasedSampler(dataset_train,
                                            batch_size=2,
                                            drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler_train)
    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=3,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    # Create the model
    if args.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif args.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif args.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=False)
    elif args.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif args.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if args.use_gpu:
        retinanet = nn.DataParallel(retinanet,
                                    device_ids=args.device_ids).cuda()

    # retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(args.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                nn.utils.clip_grad_norm_(retinanet.parameters(),
                                         0.1)  # 梯度的最大范数为0.1

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if args.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module,
                   '{}_retinanet_{}.pt'.format(args.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt')