def __init__(
        self,
        learning_rate: float = 0.0001,
        num_classes: int = 91,
        backbone: Optional[Union[str, torch.nn.Module]] = None,
        fpn: bool = True,
        pretrained: bool = False,
        pretrained_backbone: bool = True,
        trainable_backbone_layers: int = 3,
        **kwargs: Any,
    ):
        """
        Args:
            learning_rate: the learning rate
            num_classes: number of detection classes (including background)
            backbone: Pretained backbone CNN architecture or torch.nn.Module instance.
            fpn: If True, creates a Feature Pyramind Network on top of Resnet based CNNs.
            pretrained: if true, returns a model pre-trained on COCO train2017
            pretrained_backbone: if true, returns a model with backbone pre-trained on Imagenet
            trainable_backbone_layers: number of trainable resnet layers starting from final block
        """
        if not _TORCHVISION_AVAILABLE:  # pragma: no cover
            raise ModuleNotFoundError(
                "You want to use `torchvision` which is not installed yet.")

        super().__init__()

        self.learning_rate = learning_rate
        self.num_classes = num_classes
        self.backbone = backbone
        if backbone is None:
            self.model = fasterrcnn_resnet50_fpn(
                pretrained=pretrained,
                pretrained_backbone=pretrained_backbone,
                trainable_backbone_layers=trainable_backbone_layers,
            )

            in_features = self.model.roi_heads.box_predictor.cls_score.in_features
            self.model.roi_heads.box_predictor = FastRCNNPredictor(
                in_features, self.num_classes)

        else:
            if isinstance(self.backbone, torch.nn.Module):
                backbone_model = self.backbone
                if pretrained_backbone:
                    import warnings

                    warnings.warn(
                        "You would need to load the pretrained state_dict yourself if you are "
                        "providing backbone of type torch.nn.Module / pl.LightningModule."
                    )
            else:
                backbone_model = create_fasterrcnn_backbone(
                    self.backbone,
                    fpn,
                    pretrained_backbone,
                    trainable_backbone_layers,
                    **kwargs,
                )
            self.model = torchvision_FasterRCNN(backbone_model,
                                                num_classes=num_classes,
                                                **kwargs)
class Model:
    # 构建并加载模型参数
    CLASS_NAMES = ['__background__', 'A', 'B', 'C', 'D', 'X']
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn()
    num_classes = 6
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    model.load_state_dict(torch.load('model-use.pth'))
    model.eval()
    clf = KMeans(n_clusters=8)  # 8簇kemns聚类模型

    # 预测
    def prediction(self, img, threshold):
        # img = Image.open(img)
        img = cv2.imread(img) # 读取图片,并将图片转为黑底白字
        img[img > 180] = 255
        img = 255 - img
        img[img > 100] = 255
        img = Image.fromarray(img.astype('uint8')).convert('RGB')
        transform = transforms.Compose([transforms.ToTensor()])
        img = transform(img)
        img = img.to(self.device)
        pred = self.model([img]) # Pass the image to the model
        pred_class = [self.CLASS_NAMES[i] for i in list(pred[0]['labels'].to("cpu").numpy())]
        pred_boxes = [[int(i[0]), int(i[1]), int(i[2]), int(i[3])] for i in list(pred[0]['boxes'].to("cpu").detach().numpy())]
        pred_score = list(pred[0]['scores'].to("cpu").detach().numpy())
        pred_t = [pred_score.index(x) for x in pred_score if x >= threshold][-1] # 筛选在阈值之上的目标
        pred_boxes = pred_boxes[:pred_t+1]
        pred_class = pred_class[:pred_t+1]
        pred_score = pred_score[:pred_t+1]
        for i in range(len(pred_score)):
            pred_score[i] = int(pred_score[i] * 100) # 置信率转为整数
        return pred_boxes, pred_class, pred_score

    # 将预测目标进行排序,返回排序后的结果
    def getAns(self, img, threshold):

        pred_boxes, pred_class, pred_score =  self.prediction(img, threshold)
        letters = []
        for i in range(len(pred_boxes)):
            if pred_class[i] != 'X':
                letter = Letter(pred_boxes[i], pred_class[i], pred_score[i])
                letters.append(letter)
                
        letters.sort(key=lambda x: x.boxesn[0] + 2000 * x.boxesn[1]) # 按照纵坐标粗排
        letters_y = np.array([(x.boxesn[1] + x.boxesn[3]) / 2 for x in letters]).reshape(-1, 1)
        self.clf.fit(letters_y) # 将所有目标的纵坐标使用聚类,细分出每行的目标,然后行内按照横坐标排序
        row_labels = self.clf.predict(letters_y)
        last_row_label = -1
        begin = 0
        sorted_letters = [] # 存储排序后的目标字母
        row_labels = np.append(row_labels, 99)
        for i, row_label in enumerate(row_labels):
            if row_label != last_row_label: # 新的一行
                temp = letters[begin:i]
                temp.sort(key=lambda x: x.boxesn[0])
                sorted_letters = sorted_letters + temp
                begin = i
            last_row_label = row_label 
        return sorted_letters
        

        
Example #3
0
def train(folder):
    # import torchvision
    # from torchvision.models.detection import FasterRCNN
    # from torchvision.models.detection.rpn import AnchorGenerator
    from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True)
    num_classes = 4  # (Red, Yellow, Green, Unknown)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # # # # # #   # # # # # #   # # # # # #   # # # # # #   # # # # # #
    # Different ties but the above works best
    # backbone = torchvision.models.mobilenet_v2(pretrained=True).features
    # FasterRCNN needs to know the number of
    # output channels in a backbone. For mobilenet_v2, it's 1280
    # so we need to add it here
    # backbone.out_channels = 1280

    # let's make the RPN generate 5 x 3 anchors per spatial
    # location, with 5 different sizes and 3 different aspect
    # ratios. We have a Tuple[Tuple[int]] because each feature
    # map could potentially have different sizes and
    # aspect ratios
    # anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
    #                                   aspect_ratios=((0.5, 1.0, 2.0),))

    # let's define what are the feature maps that we will
    # use to perform the region of interest cropping, as well as
    # the size of the crop after rescaling.
    # if your backbone returns a Tensor, featmap_names is expected to
    # be [0]. More generally, the backbone should return an
    # OrderedDict[Tensor], and in featmap_names you can choose which
    # feature maps to use.
    # roi_pooler = torchvision.ops.MultiScaleRoIAlign(
    # featmap_names=['0', '1', '2', '3'],
    #                                                 output_size=7,
    #                                                 sampling_ratio=2)

    # put the pieces together inside a FasterRCNN model
    # model = FasterRCNN(backbone,
    #                   num_classes=4,
    #                   rpn_anchor_generator=anchor_generator,
    #                   box_roi_pool=roi_pooler)
    # replace the pre-trained head with a new one
    # # # # # #   # # # # # #   # # # # # #   # # # # # #   # # # # # #

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # use our dataset and defined transformations
    dataset = BSTLDataset(root=folder)
    # dataset_test = BSTLDataset(train=False)
    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-50])
    # dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=10,
                                              shuffle=True,
                                              num_workers=8,
                                              collate_fn=collate_fn)

    # data_loader_test = torch.utils.data.DataLoader(
    #     dataset_test, batch_size=1, shuffle=False, num_workers=1,
    #     collate_fn=collate_fn)

    # move model to the right device
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=0.005,
                                momentum=0.9,
                                weight_decay=0.0005)

    # and a learning rate scheduler
    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
    #                                                 step_size=3,
    #                                                 gamma=0.1)
    lr_scheduler = None

    # let's train it for 100 epochs
    num_epochs = 100
    loop = tqdm(total=(num_epochs), position=0)
    loss_hist = Averager()

    for epoch in range(num_epochs):
        loss_hist.reset()
        # train for one epoch, printing every 10 iterations
        tloss = run_one_epoch(data_loader, optimizer, model, lr_scheduler,
                              device, loss_hist)
        # eloss = evaluate(model, data_loader_test, device=device)
        eloss = 0.0
        loop.set_description(
            'epoch:{}, train loss:{:.4f}, test loss:{:.4f}'.format(
                epoch, tloss, eloss))
        loop.update(1)
        if epoch % 10 == 0:
            torch.save(model.state_dict(), '/tmp/tlight_' + str(epoch) + '.pt')
Example #4
0
def pretrained_model(model_name, model_dict=pretrained_model_dict,num_classes=2):
    model = model_dict[model_name]
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=4,
        shuffle=True,
        num_workers=4,
        collate_fn=utils.collate_fn)
    data_loader_val = torch.utils.data.DataLoader(dataset_val,
                                                  batch_size=1,
                                                  shuffle=True,
                                                  num_workers=4,
                                                  collate_fn=utils.collate_fn)

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True)
    num_ftrs = model.roi_heads.box_predictor.bbox_pred.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(num_ftrs,
                                                      len(class_names))

    num_epochs = 5
    model = model.to(device)
    best_mAP = 0.0
    for lr in learningRates:

        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

        best_lr_mAP = 0.0
        dataset_sizes = {'train': len(dataset_train), 'val': len(dataset_val)}

        best_model_wts = copy.deepcopy(model.state_dict())

        for epoch in range(num_epochs):
def main(args):
    utils.init_distributed_mode(args)
    print(args)

    # Opening YAML cfg config file
    with open(args.cfg_file, 'r') as stream:
        try:
            cfg_file = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)

    # Retrieving cfg
    test_cfg = cfg_file['test']
    model_cfg = cfg_file['model']
    data_cfg = cfg_file['dataset']

    # Setting device
    device = torch.device(model_cfg['device'])

    # Retrieving pretrained model
    available_pretrained_models = test_cfg['pretrained_models']
    pretrained_model_name = args.load_model
    assert pretrained_model_name in available_pretrained_models.keys(), \
        "Pretrained model {} not available".format(pretrained_model_name)
    checkpoint_path = available_pretrained_models[pretrained_model_name]

    # Creating model
    print("Creating model")
    if "50" in pretrained_model_name:
        model = fasterrcnn_resnet50_fpn(
            pretrained=False,
            pretrained_backbone=False,
            box_detections_per_img=model_cfg["max_dets_per_image"],
            box_score_thresh=model_cfg["det_thresh"],
            box_nms_thresh=model_cfg["nms"],
            model_dir=model_cfg["cache_folder"],
        )
    else:
        model = fasterrcnn_resnet101_fpn(
            pretrained=False,
            pretrained_backbone=False,
            box_detections_per_img=model_cfg["max_dets_per_image"],
            box_score_thresh=model_cfg["det_thresh"],
            box_nms_thresh=model_cfg["nms"],
            model_dir=model_cfg["cache_folder"],
        )

    # Loading weights
    if not "coco" in pretrained_model_name:
        num_classes = 1 + 1  # num classes + background
        # Getting number of input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # Replacing the pre-trained head with a new one
        model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)

    if checkpoint_path.startswith('http://') or checkpoint_path.startswith(
            'https://'):
        checkpoint = torch.hub.load_state_dict_from_url(
            checkpoint_path,
            map_location='cpu',
            model_dir=model_cfg["cache_folder"])
    else:
        checkpoint = torch.load(checkpoint_path, map_location='cpu')

    if 'model' in checkpoint.keys():
        checkpoint = checkpoint['model']
    model.load_state_dict(checkpoint)

    # Putting model to device and setting eval mode
    model.to(device)
    model.eval()

    # Retrieving phase and some data parameters
    phase = test_cfg['phase']
    assert phase == "test" or phase == "val", "Not valid phase"
    data_root = data_cfg['root']
    datasets_names = data_cfg[phase]

    # Creating dataset(s) and dataloader(s)
    percentage = None
    if phase == "val":
        percentage = test_cfg['percentage_val']
    for dataset_name, dataset_cfg in datasets_names.items():
        # Creating dataset
        dataset = CustomYoloAnnotatedDataset(data_root,
                                             {dataset_name: dataset_cfg},
                                             transforms=get_transform(),
                                             phase=phase,
                                             percentage=percentage)
        dataloader = DataLoader(dataset,
                                batch_size=test_cfg['batch_size'],
                                shuffle=False,
                                num_workers=test_cfg['num_workers'],
                                collate_fn=dataset.standard_collate_fn)

        # Evaluate
        evaluate(test_cfg,
                 model,
                 dataloader,
                 dataset_name,
                 split=dataset_cfg.rsplit(".", 1)[1],
                 args=args)

    print('DONE!')
Example #7
0
def main(args):
    # --- CONFIG
    device = torch.device(f"cuda:{args.cuda}" if torch.cuda.is_available()
                          and args.cuda >= 0 else "cpu")
    # ---------

    # --- TRANSFORMATIONS
    train_transform = ToTensor()
    test_transform = ToTensor()
    # ---------

    # --- SCENARIO CREATION
    torch.random.manual_seed(1234)
    n_exps = 5
    benchmark = split_penn_fudan(
        n_experiences=n_exps,
        train_transform=train_transform,
        eval_transform=test_transform,
    )
    # ---------

    # MODEL CREATION

    num_classes = benchmark.n_classes + 1  # N classes + background
    if args.detection_only:
        # Ingore the segmentation task
        # load a model pre-trained on COCO
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
            pretrained=True)

        # Replace the classifier with a new one, that has "num_classes" outputs
        # 1) Get number of input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # 2) Replace the pre-trained head with a new one
        model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)
    else:
        # Detection + Segmentation
        model = torchvision.models.detection.maskrcnn_resnet50_fpn(
            pretrained=True)

        # Replace the classifier with a new one, that has "num_classes" outputs
        # 1) Get number of input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # 2) Replace the pre-trained head with a new one
        model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)

        # now get the number of input features for the mask classifier
        in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        # and replace the mask predictor with a new one
        model.roi_heads.mask_predictor = MaskRCNNPredictor(
            in_features_mask, hidden_layer, num_classes)

    model = model.to(device)

    # Define the optimizer and the scheduler
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=0.005,
                                momentum=0.9,
                                weight_decay=0.0005)

    train_mb_size = 5
    warmup_factor = 1.0 / 1000
    warmup_iters = min(
        1000,
        len(benchmark.train_stream[0].dataset) // train_mb_size - 1)
    lr_scheduler = torch.optim.lr_scheduler.LinearLR(
        optimizer, start_factor=warmup_factor, total_iters=warmup_iters)

    # CREATE THE STRATEGY INSTANCE (NAIVE)
    cl_strategy = ObjectDetectionTemplate(
        model=model,
        optimizer=optimizer,
        train_mb_size=train_mb_size,
        train_epochs=1,
        eval_mb_size=train_mb_size,
        device=device,
        plugins=[
            LRSchedulerPlugin(
                lr_scheduler,
                step_granularity="iteration",
                first_exp_only=True,
                first_epoch_only=True,
            )
        ],
        evaluator=EvaluationPlugin(
            timing_metrics(epoch=True),
            loss_metrics(epoch_running=True),
            make_penn_fudan_metrics(detection_only=args.detection_only),
            loggers=[InteractiveLogger()],
        ),
    )

    # TRAINING LOOP
    print("Starting experiment...")
    for i, experience in enumerate(benchmark.train_stream):
        print("Start of experience: ", experience.current_experience)
        print("Train dataset contains", len(experience.dataset), "instances")

        cl_strategy.train(experience, num_workers=4)
        print("Training completed")

        cl_strategy.eval(benchmark.test_stream, num_workers=4)
        print("Evaluation completed")
def train(base_dir, n_splits=5, n_epochs=40, batch_size=16,
          train_folds=None, model_name='faster-rcnn-baseline',
          eval_per_n_epochs=10, seed=15501, verbose=True):
    """
    Train frcnn baseline.
    Largely inspired by: https://www.kaggle.com/pestipeti/pytorch-starter-fasterrcnn-train

    train_splits expects a list/tuple of ints. If train_splits,
    only train the specified splits.
    """
    np.random.seed(seed)

    data_dir = os.path.join(base_dir, 'data')
    train_imgs_dir = os.path.join(data_dir, 'train')
    test_imgs_dir = os.path.join(data_dir, 'test')
    models_out_dir = os.path.join(base_dir, 'artifacts',
                                  model_name, 'models')
    os.makedirs(models_out_dir, exist_ok=True)
    preds_out_dir = os.path.join(base_dir, 'artifacts',
                                 model_name, 'predictions')
    os.makedirs(preds_out_dir, exist_ok=True)
    log_file = os.path.join(base_dir, 'artifacts', model_name, 'train.log')
    open(log_file, 'a').close() # create empty file.

    logger = logging.getLogger(model_name)
    logger.addHandler(logging.FileHandler(log_file))
    logger.setLevel(logging.INFO)

    train_df, test_df = get_train_test_df(data_dir)

    kf = GroupKFold(n_splits)
    split = kf.split(X=train_df[['image_id']],
                     y=train_df[['x', 'y', 'w', 'h']],
                     groups=train_df['image_id'])

    if isinstance(train_folds, (list, tuple)):
        split = [fold for i, fold in enumerate(split) if i in train_folds]
        info = f'Training only on folds {train_folds}.'
        log_message(log_message, logger, verbose)
    else:
        train_folds = range(n_splits)

    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    for split_n, (train_idx, val_idx) in zip(train_folds, split):
        info = f'Training fold {split_n} beginning.'
        log_message(info, logger, verbose)

        train = train_df.iloc[train_idx].copy()
        val = train_df.iloc[val_idx].copy()

        train_dataset = WheatDataset(train, train_imgs_dir,
                                     get_train_transform())

        val_dataset = WheatDataset(val, train_imgs_dir,
                                   get_valid_transform())

        test_dataset = WheatDataset(test_df, test_imgs_dir,
                                    get_test_transform(),
                                    train=False)

        # load pretrained faster-rcnn with resnet50 backbone
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

        # update pre-trained head
        num_classes = 2
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
                                                          num_classes)

        train_data_loader = DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=4,
                                       collate_fn=collate_fn)

        val_data_loader = DataLoader(val_dataset,
                                     batch_size=8,
                                     shuffle=False,
                                     num_workers=4,
                                     collate_fn=collate_fn)

        test_data_loader = DataLoader(test_dataset,
                                      batch_size=4,
                                      shuffle=False,
                                      num_workers=4,
                                      drop_last=False,
                                      collate_fn=collate_fn)

        model.to(device)
        params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.75)

        loss_hist = LossAverager()

        tstart = time.time()
        for epoch in range(1, n_epochs+1):
            info = f'Training epoch #{epoch}.'
            log_message(info, logger, verbose)

            loss_hist.reset()

            model.train()

            it = 1
            for images, targets, _ in train_data_loader:
                for im, targ in zip(images, targets):
                    if torch.isnan(im).any():
                        info = f'ERROR: NaN in input image. Epoch {epoch}, iteration {it}.'
                        log_message(info, logger, verbose, err=True)
                        continue

                    for key, val in targ.items():
                        if torch.isnan(val).any():
                            info = f'ERROR: NaN in target {key}. Epoch {epoch}, iteration {it}.'
                            log_message(info, logger, verbose, err=True)
                            continue

                images = list(image.to(device) for image in images)
                targets = [{k: v.long().to(device) for k, v in t.items()} for t in targets]

                loss_dict = model(images, targets)

                losses = sum(loss for loss in loss_dict.values())
                loss_value = losses.item()

                if not math.isfinite(loss_value):
                    info = f'Loss {loss_value} is not finite. Epoch {epoch}, iteration {it}.'
                    log_message(info, logger, verbose, err=True)
                    optimizer.zero_grad()
                    continue

                loss_hist.send(loss_value)

                optimizer.zero_grad()

                losses.backward()

                # torch.nn.utils.clip_grad_value_(model.parameters(), 2)
                optimizer.step()

                if it % 20 == 0:
                    info = f'Iteration #{it} loss: {loss_value}'
                    log_message(info, logger, verbose)
                it += 1

            lr_scheduler.step()

            tepoch = time.time() - tstart
            info = f'Epoch #{epoch} completed after {tepoch // 60} minutes {round(tepoch % 60)} seconds. Loss: {loss_hist.value}.'
            log_message(info, logger, verbose)

            if epoch+1 % eval_per_n_epochs == 0:
                # may want to add this to eval.py... somehow?
                thresholds = np.linspace(0.5, 0.75, 6)
                precisions_by_thresh = []

                model.eval()

                for images, targets, _ in val_data_loader:
                    images = list(image.to(device) for image in images)
                    outputs = model(images)

                    for targ, out in zip(targets, outputs):
                        gt = targ['boxes'].cpu().numpy().astype(np.int32)
                        scores = out['scores'].data.cpu().numpy()
                        # predictions ordered by confidence
                        preds = out['boxes'].data.cpu().numpy()[np.argsort(scores)]
                        ap_by_thresh = calculate_image_precision_by_threshold(gt,
                                                                              preds,
                                                                              thresholds=thresholds,
                                                                              form='pascal_voc')
                        precisions_by_thresh.extend(ap_by_thresh)

                mean_precisions_by_thresh = pd.DataFrame(precisions_by_thresh, columns=['thresh', 'ap'])
                mean_precisions_by_thresh = mean_precisions_by_thresh.groupby('thresh')['ap'].mean().reset_index()
                mean_ap = mean_precisions_by_thresh['ap'].mean()

                for thresh, ap in zip(mean_precisions_by_thresh['thresh'], mean_precisions_by_thresh['ap']):
                    info = f'Epoch #{epoch} - AP at IOU threshold {thresh}: {ap}.'
                    log_message(info, logger, verbose)

                info = f'Epoch #{epoch} - Mean AP across all thresholds: {mean_ap}.'
                log_message(info, logger, verbose)

        # save model.
        torch.save(model.state_dict(), os.path.join(models_out_dir,
                                                    f'trained_fold_{split_n}.pth'))

        model.eval()

        detection_threshold = 0.1
        res = []

        for images, _, image_ids in val_data_loader:
            images = list(image.to(device) for image in images)
            outputs = model(images)

            for output, image_id in zip(outputs, image_ids):
                boxes = output['boxes'].data.cpu().numpy()
                scores = output['scores'].data.cpu().numpy()

                boxes = boxes[scores >= detection_threshold].astype(np.int32)
                scores = scores[scores >= detection_threshold]

                boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
                boxes[:, 3] = boxes[:, 3] - boxes[:, 1]

                for out in np.hstack([boxes, scores.reshape(-1, 1)]):
                    res.append([image_id] + list(out))

        df_res = pd.DataFrame(res, columns=['image_id', 'x', 'y', 'w', 'h', 'score'])
        df_res.to_csv(os.path.join(preds_out_dir, f'oof_pred_fold_{split_n}.csv'), index=False)

        detection_threshold = 0.1
        res = []

        for images, image_ids in test_data_loader:
            images = list(image.to(device) for image in images)
            outputs = model(images)

            for output, image_id in zip(outputs, image_ids):
                boxes = output['boxes'].data.cpu().numpy()
                scores = output['scores'].data.cpu().numpy()

                boxes = boxes[scores >= detection_threshold].astype(np.int32)
                scores = scores[scores >= detection_threshold]

                boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
                boxes[:, 3] = boxes[:, 3] - boxes[:, 1]

                for out in np.hstack([boxes, scores.reshape(-1, 1)]):
                    res.append([image_id] + list(out))

        df_res = pd.DataFrame(res, columns=['image_id', 'x', 'y', 'w', 'h', 'score'])
        df_res.to_csv(os.path.join(preds_out_dir, f'test_pred_fold_{split_n}.csv'), index=False)
        test_loader = PascalDetectionDataset(test_pascal,
                                             PATH_PASCAL + PASCAL_IMG,
                                             PATH_PASCAL + PASCAL_XML,
                                             transform_detection_pascal)

##Hyperparameters for detection
num_archi_features = len(archi_features)
num_classes_detection = num_archi_features + 1  # num_archi_features + background
num_styles = len(styles)

##Build detection model
if args.weight == "bbox_level":
    from utils.pytorch_utils import fasterrcnn_resnet50_fpn_custom
    detector = fasterrcnn_resnet50_fpn_custom(True)
    in_features = detector.roi_heads.box_predictor.cls_score.in_features
    detector.roi_heads.box_predictor = FastRCNNPredictor(
        in_features, num_classes_detection)
else:
    detector = models.detection.fasterrcnn_resnet50_fpn(True)
    in_features = detector.roi_heads.box_predictor.cls_score.in_features
    detector.roi_heads.box_predictor = FastRCNNPredictor(
        in_features, num_classes_detection)

if args.exp_weights == 'exponential':
    is_exponential = True
elif args.exp_weights == 'linear':
    is_exponential = False
else:
    print("Unrecognized type of weighting, defaulted to linear")
    is_exponential = False

detector.cuda()
def train(batch_size, checkpoint_freq, num_epochs):

    num_classes = 2  
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True, )

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256

    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)
    model = torch.nn.DataParallel(model)
    model.to('cuda')
    
    dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
    dataset_test = PennFudanDataset('PennFudanPed', get_transform(train=False))

  
    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-50])
    dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=True, num_workers=4,
        collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=batch_size, shuffle=False, num_workers=4,
        collate_fn=utils.collate_fn)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

    hook = smd.Hook.create_from_json_file()
        

    
    for epoch in range(num_epochs):
        
        hook.set_mode(modes.TRAIN)
        model.train()
        metric_logger = utils.MetricLogger(delimiter="  ")
        metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
        header = 'Epoch: [{}]'.format(epoch)

        if epoch == 0:
            warmup_factor = 1. / 1000
            warmup_iters = min(1000, len(data_loader) - 1)

            lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

        for iteration, (images, targets) in enumerate(data_loader):
            images = list(image.to('cuda') for image in images)
            targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            loss_dict_reduced = utils.reduce_dict(loss_dict)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())

            loss_value = losses_reduced.item()

            optimizer.zero_grad()
            losses.backward()

            optimizer.step()

            if lr_scheduler is not None:
                lr_scheduler.step()

            metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
            metric_logger.update(lr=optimizer.param_groups[0]["lr"])

        utils.save_on_master({
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict()
        },
        'model_{}.pth')

        lr_scheduler.step()
        
        hook.set_mode(modes.EVAL)
        evaluate(model, data_loader_test, device='cuda')
    def build_and_set_model(self):
        # create mask rcnn model
        num_classes = 2
        self.device = torch.device(
            "cpu")  # TODO check if cuda is supported, or we just use cpu

        # more details at https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

        # finetuning

        # load a model pre-trained on COCO, num_classes=91, cannot change.... as the pretrained model won't load
        self.model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn(
            pretrained=True, image_mean=self.img_mean, image_std=self.img_std)
        # FL = FocalLoss(gamma=2, alpha=0.75)  # Version 5, gamma=2, alpha=0.75, 0.8034...
        # FL = FocalLoss(gamma=1, alpha=0.75, magnifier=3)  # + early stop,2 stop at 7, (data split) version 7 0.8026
        # FL = FocalLoss(gamma=1, alpha=0.5, magnifier=3)  # version 8 0.8031, 6 epoch
        # FL = FocalLoss(gamma=0.5, alpha=0.5, magnifier=1)  # command line submission, 4 epochs cv+aug
        FL = FocalLoss(
            gamma=0.5, alpha=0.5, magnifier=1
        )  # changed lr decay 2/0.15 + patience=3, do not use focal loss...
        FL_wrapped = functools.partial(maskrcnn_loss_focal, focal_loss_func=FL)
        # FL_wrapped = None  # changed lr decay 2/0.15, do not use focal loss... 0.8025

        RoIHeads_loss_customized.set_customized_loss(
            self.model_ft.roi_heads, maskrcnn_loss_customized=FL_wrapped)
        RoIHeads_loss_customized.update_forward_func(self.model_ft.roi_heads)

        # get number of input features for the classifier
        in_features = self.model_ft.roi_heads.box_predictor.cls_score.in_features
        # replace the pre-trained head with a new one
        self.model_ft.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)

        # change mask prediction head, only predict background and pneu... part
        in_features_mask = self.model_ft.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        self.model_ft.roi_heads.mask_predictor = MaskRCNNPredictor(
            in_features_mask, hidden_layer, num_classes)

        # GPU
        # my_trace()  # test about to
        self.model_ft.to(self.device)
        #  self.logger.debug(f"model info:\n{self.model_ft}")

        # for param in self.model_ft.parameters():
        #    param.requires_grad = True

        params = [p for p in self.model_ft.parameters() if p.requires_grad]

        start_learning_rate = 0.001

        try:
            if self._debug_continue_training:  # monkey patch
                start_learning_rate = 0.00001
        except Exception:
            pass
        self.optimizer = torch.optim.SGD(params,
                                         lr=start_learning_rate,
                                         momentum=0.9,
                                         weight_decay=0.0005)
        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
            self.optimizer,
            step_size=4,
            # after changed to 3, 0.8042 (improved from 0.8033)
            # step_size 4, with little aug, 0.8037 (Version 11)
            gamma=0.1,
        )
Example #12
0
def detection_fasterrcnn(img_path, finetune=False):
    torch.cuda.empty_cache()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # load a model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True)

    # replace the classifier with a new one, that has
    # num_classes which is user-defined
    if finetune:
        num_classes = 2  # 1 class (person) + background
        # get number of input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # replace the pre-trained head with a new one
        model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)

    model = model.to(device)
    model.eval()
    print(model)

    # load color image
    img = cv2.imread(img_path)
    img_tr = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_tr = np.transpose(img_tr, (2, 0, 1)).astype(np.float32) / 255.0
    # print(img.shape)

    t = torch.from_numpy(img_tr).to(device)
    t = t.unsqueeze(0)

    with torch.no_grad():
        out = model(t)

    # print(out)

    boxes = out[0]["boxes"].data.cpu().numpy()
    scores = out[0]["scores"].data.cpu().numpy()
    labels = out[0]["labels"].data.cpu().tolist()

    category = {
        0: 'background',
        1: 'person',
        2: 'traffic light',
        3: 'train',
        4: 'traffic sign',
        5: 'rider',
        6: 'car',
        7: 'bike',
        8: 'motor',
        9: 'truck',
        10: 'bus'
    }

    boxes = boxes[scores >= 0.5].astype(np.int32)
    pnum = 0
    for i, box in enumerate(boxes):
        cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 0),
                      thickness=2)
        if labels[i] == 1:
            draw_texts(img,
                       'person ' + str(round(scores[i], 3)),
                       offset_x=box[0],
                       offset_y=box[1])
            pnum += 1
    draw_texts(img,
               'people: ' + str(pnum),
               offset_x=10,
               offset_y=20,
               color=(0, 255, 0))

    cv2.imshow("result", img)
    cv2.waitKey(0)
Example #13
0
def train(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {"batch_size": opt.batch_size * num_gpus,
                       "shuffle": True,
                       "drop_last": True,
                       "collate_fn": collate_fn,
                       "num_workers": 12}

    test_params = {"batch_size": opt.batch_size,
                   "shuffle": False,
                   "drop_last": False,
                   "collate_fn": collate_fn,
                   "num_workers": 12}

    training_set = WaymoDataset(
        cameras=[opt.cam_view],scope='training',
        transform=transforms.Compose([Normalizer(), Resizer()]),
        mod='fast_rcnn')
    training_generator = DataLoader(training_set, **training_params)

    test_set = WaymoDataset(
        cameras=[opt.cam_view], scope='validation',
        transform=transforms.Compose([Normalizer(), Resizer()]),
        mod='fast_rcnn')
    test_generator = DataLoader(test_set, **test_params)
    
    
    print(f'Using pretrained model? {opt.pretrained_model}')
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=opt.pretrained_model)
    # num_classes which is user-defined
    num_classes = training_set.num_classes()
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one, this will really need to be trained!
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    model.load_state_dict(torch.load('trained_models/fasterrcnn_resnet50_waymo.pth'))
    
    # only if we use the pretrained model
    if opt.pretrained_model:
        transfer_learning(model, opt.freeze_layers)

    # Chosing the device/cpu or gpu
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)


    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    # writer = SummaryWriter(opt.log_path)
    if torch.cuda.is_available():
        model = model.cuda()
        model = nn.DataParallel(model)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

    # and a learning rate scheduler which decreases the learning rate by
    # 10x every 3 epochs
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epochs):
        model.train()
        epoch_loss = []
        progress_bar = tqdm(training_generator)
        for iter, data in enumerate(progress_bar):
            optimizer.zero_grad()
            images = data[0]
            targets = data[1]
            images = list(image for image in images)
            targets = [{k: v for k, v in t.items()} for t in targets]

            if torch.cuda.is_available():
                losses = model(images.cuda(), targets.cuda())
                cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg']
            else:
                losses = model(images, targets)
                cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg']

            cls_loss = cls_loss.mean()
            reg_loss = reg_loss.mean()
            loss = cls_loss + reg_loss
            
            if loss == 0:
                continue
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
            optimizer.step()
            epoch_loss.append(float(loss))
            total_loss = np.mean(epoch_loss)
            if iter % 5 == 0:
                print(f'Total loss at iteration {iter}: {total_loss}')
            progress_bar.set_description(
                'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}'.format(
                    epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss, reg_loss, loss,
                    total_loss))
            # writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter)
            # writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter)
            # writer.add_scalar('Train/Classfication_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter)
            # Save every 100 samples
            if iter % 200 ==0:
                print(f"Saving model at :{opt.saved_path}/fasterrcnn_resnet50_waymo.pth")
                torch.save(model.state_dict(), os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth"))
#                 torch.save(model, os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth"))

        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            loss_regression_ls = []
            loss_classification_ls = []
            for iter, data in enumerate(test_generator):
                with torch.no_grad():
                    images = data[0]
                    targets = data[1]
                    images = list(image for image in images)
                    targets = [{k: v for k, v in t.items()} for t in targets]

                    if torch.cuda.is_available():
                        losses = model(images.cuda(), targets.cuda())
                        cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg']
                    else:
                        losses = model(images, targets)
                        cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg']

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + reg_loss

            print(
                'Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'.format(
                    epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                    np.mean(loss)))
            # writer.add_scalar('Test/Total_loss', loss, epoch)
            # writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            # writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss, epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                torch.save(model.state_dict(), os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth"))
#                 torch.save(model, os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth"))

                dummy_input = torch.rand(opt.batch_size, 3, 512, 512)
                if torch.cuda.is_available():
                    dummy_input = dummy_input.cuda()
                if isinstance(model, nn.DataParallel):
                    model.module.backbone_net.model.set_swish(memory_efficient=False)

                    torch.onnx.export(model.module, dummy_input,
                                      os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.onnx"),
                                      verbose=False)
                    model.module.backbone_net.model.set_swish(memory_efficient=True)
                else:
                    model.backbone_net.model.set_swish(memory_efficient=False)

                    torch.onnx.export(model, dummy_input,
                                      os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.onnx"),
                                      verbose=False)
                    model.backbone_net.model.set_swish(memory_efficient=True)

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print("Stop training at epoch {}. The lowest loss achieved is {}".format(epoch, loss))
                break
Example #14
0
    def __init__(self, config, device):
        super(ESRGAN_EESN_FRCNN_Model, self).__init__(config, device)
        self.configG = config['network_G']
        self.configD = config['network_D']
        self.configT = config['train']
        self.configO = config['optimizer']['args']
        self.configS = config['lr_scheduler']
        self.config = config
        self.device = device
        #Generator
        self.netG = model.ESRGAN_EESN(in_nc=self.configG['in_nc'],
                                      out_nc=self.configG['out_nc'],
                                      nf=self.configG['nf'],
                                      nb=self.configG['nb'])
        self.netG = self.netG.to(self.device)
        self.netG = DataParallel(self.netG)

        #descriminator
        self.netD = model.Discriminator_VGG_128(in_nc=self.configD['in_nc'],
                                                nf=self.configD['nf'])
        self.netD = self.netD.to(self.device)
        self.netD = DataParallel(self.netD)

        #FRCNN_model
        self.netFRCNN = torchvision.models.detection.fasterrcnn_resnet50_fpn(
            pretrained=True)
        num_classes = 2  # car and background
        in_features = self.netFRCNN.roi_heads.box_predictor.cls_score.in_features
        self.netFRCNN.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)
        self.netFRCNN.to(self.device)

        self.netG.train()
        self.netD.train()
        self.netFRCNN.train()
        #print(self.configT['pixel_weight'])
        # G CharbonnierLoss for final output SR and GT HR
        self.cri_charbonnier = CharbonnierLoss().to(device)
        # G pixel loss
        if self.configT['pixel_weight'] > 0.0:
            l_pix_type = self.configT['pixel_criterion']
            if l_pix_type == 'l1':
                self.cri_pix = nn.L1Loss().to(self.device)
            elif l_pix_type == 'l2':
                self.cri_pix = nn.MSELoss().to(self.device)
            else:
                raise NotImplementedError(
                    'Loss type [{:s}] not recognized.'.format(l_pix_type))
            self.l_pix_w = self.configT['pixel_weight']
        else:
            self.cri_pix = None

        # G feature loss
        #print(self.configT['feature_weight']+1)
        if self.configT['feature_weight'] > 0:
            l_fea_type = self.configT['feature_criterion']
            if l_fea_type == 'l1':
                self.cri_fea = nn.L1Loss().to(self.device)
            elif l_fea_type == 'l2':
                self.cri_fea = nn.MSELoss().to(self.device)
            else:
                raise NotImplementedError(
                    'Loss type [{:s}] not recognized.'.format(l_fea_type))
            self.l_fea_w = self.configT['feature_weight']
        else:
            self.cri_fea = None
        if self.cri_fea:  # load VGG perceptual loss
            self.netF = model.VGGFeatureExtractor(feature_layer=34,
                                                  use_input_norm=True,
                                                  device=self.device)
            self.netF = self.netF.to(self.device)
            self.netF = DataParallel(self.netF)
            self.netF.eval()

        # GD gan loss
        self.cri_gan = GANLoss(self.configT['gan_type'], 1.0,
                               0.0).to(self.device)
        self.l_gan_w = self.configT['gan_weight']
        # D_update_ratio and D_init_iters
        self.D_update_ratio = self.configT['D_update_ratio'] if self.configT[
            'D_update_ratio'] else 1
        self.D_init_iters = self.configT['D_init_iters'] if self.configT[
            'D_init_iters'] else 0

        # optimizers
        # G
        wd_G = self.configO['weight_decay_G'] if self.configO[
            'weight_decay_G'] else 0
        optim_params = []
        for k, v in self.netG.named_parameters(
        ):  # can optimize for a part of the model
            if v.requires_grad:
                optim_params.append(v)

        self.optimizer_G = torch.optim.Adam(optim_params,
                                            lr=self.configO['lr_G'],
                                            weight_decay=wd_G,
                                            betas=(self.configO['beta1_G'],
                                                   self.configO['beta2_G']))
        self.optimizers.append(self.optimizer_G)

        # D
        wd_D = self.configO['weight_decay_D'] if self.configO[
            'weight_decay_D'] else 0
        self.optimizer_D = torch.optim.Adam(self.netD.parameters(),
                                            lr=self.configO['lr_D'],
                                            weight_decay=wd_D,
                                            betas=(self.configO['beta1_D'],
                                                   self.configO['beta2_D']))
        self.optimizers.append(self.optimizer_D)

        # FRCNN -- use weigt decay
        FRCNN_params = [
            p for p in self.netFRCNN.parameters() if p.requires_grad
        ]
        self.optimizer_FRCNN = torch.optim.SGD(FRCNN_params,
                                               lr=0.005,
                                               momentum=0.9,
                                               weight_decay=0.0005)
        self.optimizers.append(self.optimizer_FRCNN)

        # schedulers
        if self.configS['type'] == 'MultiStepLR':
            for optimizer in self.optimizers:
                self.schedulers.append(
                    lr_scheduler.MultiStepLR_Restart(
                        optimizer,
                        self.configS['args']['lr_steps'],
                        restarts=self.configS['args']['restarts'],
                        weights=self.configS['args']['restart_weights'],
                        gamma=self.configS['args']['lr_gamma'],
                        clear_state=False))
        elif self.configS['type'] == 'CosineAnnealingLR_Restart':
            for optimizer in self.optimizers:
                self.schedulers.append(
                    lr_scheduler.CosineAnnealingLR_Restart(
                        optimizer,
                        self.configS['args']['T_period'],
                        eta_min=self.configS['args']['eta_min'],
                        restarts=self.configS['args']['restarts'],
                        weights=self.configS['args']['restart_weights']))
        else:
            raise NotImplementedError(
                'MultiStepLR learning rate scheme is enough.')
        print(self.configS['args']['restarts'])
        self.log_dict = OrderedDict()

        self.print_network()  # print network
        self.load()  # load G and D if needed
Example #15
0
def initialize_model():
    model = fasterrcnn_resnet101_fpn(pretrained=False)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
    return model
def main(args):

    #img_path='./icremation_imgs/demo_imgs'
    device = torch.device(args.cuda_device)
    # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    PATH = args.model_path
    transform1 = transforms.Compose([
        transforms.ToTensor(),  # range [0, 255] -> [0.0,1.0]
    ])
    num_classes = int(configs["number_of_class"]) + 1  # class + background
    # # get number of input features for the classifier

    if configs["model_name"] == "mask_rcnn":
        model = torchvision.models.detection.maskrcnn_resnet50_fpn(
            pretrained=True)
    elif configs["model_name"] == "faster_rcnn":
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
            pretrained=True)

    # # replace the classifier with a new one, that has
    # # num_classes which is user-defined
    num_classes = int(configs["number_of_class"]) + 1  #  class  + background
    # # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    if configs["model_name"] == "mask_rcnn":
        in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        # and replace the mask predictor with a new one
        model.roi_heads.mask_predictor = MaskRCNNPredictor(
            in_features_mask, hidden_layer, num_classes)

    checkpoint = torch.load(PATH)
    model.load_state_dict(checkpoint)
    model.apply(deactivate_batchnorm)
    model.to(device)
    model.eval()
    cap = cv2.VideoCapture(args.video_path)
    while True:
        success, image = cap.read()
        if not success: break
        image1 = Image.fromarray(cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB))
        image_tensor = transform1(image1)

        image_tensor = image_tensor.unsqueeze(0).to(device)

        predictions = model(image_tensor)
        masks = predictions[0]["masks"]
        scores = predictions[0]["scores"]
        class_id = predictions[0]["labels"]
        boxes = predictions[0]["boxes"]
        pts = []
        for i, mask in enumerate(masks):
            score = scores.data[i]
            if score < args.score_thr: continue
            bb_box = boxes.data[i]
            if configs["model_name"] == "mask_rcnn":

                mask = mask.cpu().detach().numpy()
                mask[mask >= 0.6] = 1

                mask = np.reshape(mask, (mask.shape[1], mask.shape[2],
                                         mask.shape[0])).astype(np.uint8)

                color = (0, 255, 0)

                contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE,
                                                       cv2.CHAIN_APPROX_SIMPLE)

                cv2.drawContours(image, contours, 0, color, 4)
                mask_copy = np.reshape(
                    mask, (mask.shape[0], mask.shape[1])).astype(np.uint8)
                image = apply_mask(image, mask_copy, color)

            cv2.rectangle(image, (bb_box[2], bb_box[1]),
                          (bb_box[0], bb_box[3]), color, 2)

            #cv2.imwrite("result.png",image)
        show_image = cv2.resize(image.copy(), (720, 480))
        cv2.imshow('image', show_image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
Example #17
0
    # Training epochs
    num_epochs = opt.num_epochs

    # Number of images in a batch
    batch_size = opt.batch_size

    # Image size
    max_size = opt.max_size
    min_size = opt.min_size

    # Initial model
    model = fasterrcnn_resnet50_fpn(pretrained=True,
                                    min_size=min_size,
                                    max_size=max_size)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)

    # load a pre-trained model for classification and return
    # only the features
    if neck_name == 'fpn':
        out_channels = 256
        backbone = resnet_fpn_backbone(backbone_name, pretrained=True)
        backbone.out_channels = out_channels
        model.backbone = backbone
    elif neck_name == 'gfpn':
        out_channels = 256
        backbone = GroupedPyramidFeatures(backbone_name=backbone_name,
                                          out_features=out_channels,
                                          pretrained=True)
        backbone.out_channels = out_channels
        model.backbone = backbone
Example #18
0
    def __init__(self,
                 train_data,
                 mode='sgcls',
                 require_overlap_det=True,
                 use_bias=False,
                 test_bias=False,
                 backbone='vgg16',
                 RELS_PER_IMG=1024,
                 min_size=None,
                 max_size=None,
                 edge_model='motifs'):
        """
        Base class for an SGG model
        :param mode: (sgcls, predcls, or sgdet)
        :param require_overlap_det: Whether two objects must intersect
        """
        super(RelModelBase, self).__init__()
        self.classes = train_data.ind_to_classes
        self.rel_classes = train_data.ind_to_predicates
        self.mode = mode
        self.backbone = backbone
        self.RELS_PER_IMG = RELS_PER_IMG
        self.pool_sz = 7
        self.stride = 16

        self.use_bias = use_bias
        self.test_bias = test_bias

        self.require_overlap = require_overlap_det and self.mode == 'sgdet'

        if self.backbone == 'resnet50':
            self.obj_dim = 1024
            self.fmap_sz = 21

            if min_size is None:
                min_size = 1333
            if max_size is None:
                max_size = 1333

            print('\nLoading COCO pretrained model maskrcnn_resnet50_fpn...\n')
            # See https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
            self.detector = torchvision.models.detection.maskrcnn_resnet50_fpn(
                pretrained=True,
                min_size=min_size,
                max_size=max_size,
                box_detections_per_img=50,
                box_score_thresh=0.2)
            in_features = self.detector.roi_heads.box_predictor.cls_score.in_features
            # replace the pre-trained head with a new one
            self.detector.roi_heads.box_predictor = FastRCNNPredictor(
                in_features, len(self.classes))
            self.detector.roi_heads.mask_predictor = None

            layers = list(self.detector.roi_heads.children())[:2]
            self.roi_fmap_obj = copy.deepcopy(layers[1])
            self.roi_fmap = copy.deepcopy(layers[1])
            self.roi_pool = copy.deepcopy(layers[0])

        elif self.backbone == 'vgg16':
            self.obj_dim = 4096
            self.fmap_sz = 38

            if min_size is None:
                min_size = IM_SCALE
            if max_size is None:
                max_size = IM_SCALE

            vgg = load_vgg(use_dropout=False,
                           use_relu=False,
                           use_linear=True,
                           pretrained=False)
            vgg.features.out_channels = 512
            anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256,
                                                       512), ),
                                               aspect_ratios=((0.5, 1.0,
                                                               2.0), ))

            roi_pooler = torchvision.ops.MultiScaleRoIAlign(
                featmap_names=['0'],
                output_size=self.pool_sz,
                sampling_ratio=2)

            self.detector = FasterRCNN(vgg.features,
                                       min_size=min_size,
                                       max_size=max_size,
                                       rpn_anchor_generator=anchor_generator,
                                       box_head=TwoMLPHead(
                                           vgg.features.out_channels *
                                           self.pool_sz**2, self.obj_dim),
                                       box_predictor=FastRCNNPredictor(
                                           self.obj_dim,
                                           len(train_data.ind_to_classes)),
                                       box_roi_pool=roi_pooler,
                                       box_detections_per_img=50,
                                       box_score_thresh=0.2)

            self.roi_fmap = nn.Sequential(nn.Flatten(), vgg.classifier)
            self.roi_fmap_obj = load_vgg(pretrained=False).classifier
            self.roi_pool = copy.deepcopy(
                list(self.detector.roi_heads.children())[0])

        else:
            raise NotImplementedError(self.backbone)

        self.edge_dim = self.detector.backbone.out_channels

        self.union_boxes = UnionBoxesAndFeats(pooling_size=self.pool_sz,
                                              stride=self.stride,
                                              dim=self.edge_dim,
                                              edge_model=edge_model)
        if self.use_bias:
            self.freq_bias = FrequencyBias(train_data)
Example #19
0
    def __init__(self, backbone, num_classes=None,
                 # transform parameters
                 min_size=800, max_size=1333,
                 image_mean=None, image_std=None,
                 # RPN parameters
                 rpn_anchor_generator=None, rpn_head=None,
                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,
                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,
                 rpn_nms_thresh=0.7,
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
                 # Box parameters
                 box_roi_pool=None, box_head=None, box_predictor=None,
                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
                 box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,
                 box_batch_size_per_image=512, box_positive_fraction=0.25,
                 bbox_reg_weights=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError("num_classes should be None when box_predictor is specified")
        else:
            if box_predictor is None:
                raise ValueError("num_classes should not be None when box_predictor "
                                 "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(
                anchor_sizes, aspect_ratios
            )
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
            )

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(
                out_channels * resolution ** 2,
                representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                representation_size,
                num_classes)

        roi_heads = RoIHeads(
            # Box
            box_roi_pool, box_head, box_predictor,
            box_fg_iou_thresh, box_bg_iou_thresh,
            box_batch_size_per_image, box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh, box_nms_thresh, box_detections_per_img)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)

        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
Example #20
0
        num_classes = 2

        fine_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes,
                                                                          pretrained_backbone=False)
        coarse_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes,
                                                                            pretrained_backbone=False)

        # # # # replace the classifier with a new one, that has
        # # # # num_classes which is user-defined

        # # # get number of input features for the classifier
        fine_in_features = fine_model.roi_heads.box_predictor.cls_score.in_features
        coarse_in_features = coarse_model.roi_heads.box_predictor.cls_score.in_features

        # # # replace the pre-trained head with a new one
        fine_model.roi_heads.box_predictor = FastRCNNPredictor(fine_in_features, num_classes)
        coarse_model.roi_heads.box_predictor = FastRCNNPredictor(coarse_in_features, num_classes)

        for fine_p, coarse_p in zip(fine_model.parameters(), coarse_model.parameters()):
            fine_p.requires_grad = True
            coarse_p.requires_grad = True

        fine_model.to(device)
        coarse_model.to(device)

        # Optimizer
        fine_params = [p for p in fine_model.parameters() if p.requires_grad]
        coarse_params = [p for p in coarse_model.parameters() if p.requires_grad]

        fine_optim = torch.optim.SGD(fine_params, lr=0.005, momentum=0.9, weight_decay=0.0005)
        coarse_optim = torch.optim.SGD(coarse_params, lr=0.005, momentum=0.9, weight_decay=0.0005)
Example #21
0
# videosti = "C:/Users/elleh/Downloads/IMG_0412.mp4"
videosti = '../../../SwimData/SwimCodes/temp/A-H/H.mp4'

#define the device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


#define classnames
classNames = ["A","B","C","D","E","F","G","H","False"]

#Define the object detector model as objectDetector

objectDetector = models.detection.fasterrcnn_resnet50_fpn()
num_classes = 2 
in_features = objectDetector.roi_heads.box_predictor.cls_score.in_features
objectDetector.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
objectDetector.load_state_dict(torch.load("../../../SwimData/SwimCodes/objectDetection/models/RCNN_13nov.pth",
                                          map_location=device))
objectDetector.eval()
objectDetector.to(device)

# #Define the classifier
# classifier = models.vgg19(pretrained=False,progress=False)
# classifier.classifier[6] = nn.Linear(in_features=4096,out_features=len(classNames),bias=True)
# classifier.load_state_dict(torch.load("../../../SwimData/SwimCodes/classification3/models/5_0.9612403100775194.pth",
#                                       map_location=device))
# classifier = classifier.to(device)

# classtrans = transforms.Compose([ transforms.Resize((256,256)),
#         transforms.ToTensor(),
#         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
Example #22
0
                                      pin_memory=args.pin_memory)
        testloader = data.DataLoader(testset, args.batch_size)

    if args.detection:
        print("Initialize Training Mode: {}".format(args.mode))
        if args.mode == 'mtcnn':
            # model
            #mtcnn = MTCNN(image_size=224, keep_all=True, device=device)
            model = InceptionResnetV1(pretrained='vggface2',
                                      classify=True,
                                      num_classes=3).to(device)
        elif args.mode == 'faster_rcnn':
            # model
            model = fasterrcnn_resnet50_fpn(pretrained=True).to(device)
            in_feat = model.roi_heads.box_predictor.cls_score.in_features
            model.roi_heads.box_predictor = FastRCNNPredictor(in_feat,
                                                              4).to(device)
        else:
            print(
                "Error: Training Mode {} is not defined for detection dataset!"
                .format(args.mode))
    else:
        # set mode to transfer learning, if layer number of mobilenet is given
        if args.layer is not None:
            args.mode = 'transfer'

        # model
        print("Initialize Training Mode: {}".format(args.mode))
        if args.mode == 'from_scratch':
            model = models.mobilenet_v2(pretrained=False).features.to(device)
            model.classifier = nn.Sequential(
                nn.Dropout(p=0.2, inplace=False), nn.Flatten(),
def _get_instance_segmentation_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model
Example #24
0
                                 transforms=train_transform)

    # 收集batch data的函数
    def collate_fn(batch):
        return tuple(zip(*batch))

    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              collate_fn=collate_fn)

    # step 2: model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(
        in_features,
        num_classes)  # replace the pre-trained head with a new one

    model.to(device)

    # step 3: loss
    # in lib/python3.6/site-packages/torchvision/models/detection/roi_heads.py
    # def fastrcnn_loss(class_logits, box_regression, labels, regression_targets)

    # step 4: optimizer scheduler
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=LR,
                                momentum=0.9,
                                weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
Example #25
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)

        rpn_anchor_generator = task.rpn_anchor_generator
        rpn_head = task.rpn_head
        box_roi_pool = task.box_roi_pool
        box_predictor = task.box_predictor
        box_head = task.box_head

        # setup backbone
        backbone = resnet_fpn_backbone(args.backbone, args.backbone_pretrained)

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)"
            )

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if task.num_classes > 0:
            if box_predictor is not None:
                raise ValueError("num_classes should be -1 when box_predictor is specified")
        else:
            if box_predictor is None:
                raise ValueError("num_classes should be > 0 when box_predictor is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0],
            )

        rpn_pre_nms_top_n = dict(training=args.rpn_pre_nms_top_n_train, testing=args.rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=args.rpn_post_nms_top_n_train, testing=args.rpn_post_nms_top_n_test)

        rpn = RPN(
            rpn_anchor_generator, rpn_head,
            args.rpn_fg_iou_thresh, args.rpn_bg_iou_thresh,
            args.rpn_batch_size_per_image, args.rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, args.rpn_nms_thresh,
        )

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=[0, 1, 2, 3],
                output_size=7,
                sampling_ratio=2,
            )

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(
                out_channels * resolution ** 2,
                representation_size,
            )

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                representation_size,
                task.num_classes,
            )

        roi_heads = RegionOfInterestHeads(
            # Box
            box_roi_pool, box_head, box_predictor,
            args.box_fg_iou_thresh, args.box_bg_iou_thresh,
            args.box_batch_size_per_image, args.box_positive_fraction,
            args.bbox_reg_weights, args.box_score_thresh,
            args.box_nms_thresh, args.box_detections_per_img,
        )

        if args.image_mean is None:
            args.image_mean = [0.485, 0.456, 0.406]
        if args.image_std is None:
            args.image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(
            args.min_size, args.max_size,
            args.image_mean, args.image_std,
        )

        return cls(backbone, rpn, roi_heads, transform)
Example #26
0
def main(
    experiment_name: str,
    gpus: Union[str, List[str], str] = "auto",
    nproc_per_node: Union[int, str] = "auto",
    dataset_root: str = "./dataset",
    log_dir: str = "./log",
    model: str = "fasterrcnn_resnet50_fpn",
    epochs: int = 13,
    batch_size: int = 4,
    lr: int = 0.01,
    download: bool = False,
    image_size: int = 256,
    resume_from: str = None,
) -> None:
    """
    Args:
        experiment_name: the name of each run
        dataset_root: dataset root directory for VOC2012 Dataset
        gpus: can be "auto", "none" or number of gpu device ids like "0,1"
        log_dir: where to put all the logs
        epochs: number of epochs to train
        model: model to use, possible options are
            "fasterrcnn_resnet50_fpn",
            "fasterrcnn_mobilenet_v3_large_fpn",
            "fasterrcnn_mobilenet_v3_large_320_fpn"
        batch_size: batch size
        lr: initial learning rate
        download: whether to automatically download dataset
        device: either cuda or cpu
        image_size: image size for training and validation
        resume_from: path of checkpoint to resume from
    """
    if model not in AVAILABLE_MODELS:
        raise RuntimeError(f"Invalid model name: {model}")

    if isinstance(gpus, int):
        gpus = (gpus, )
    if isinstance(gpus, tuple):
        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
            [str(gpu) for gpu in gpus])
    elif gpus == "auto":
        gpus = tuple(range(torch.cuda.device_count()))
    elif gpus == "none":
        os.environ["CUDA_VISIBLE_DEVICES"] = ""
        gpus = tuple()

    ngpu = len(gpus)

    backend = "nccl" if ngpu > 0 else "gloo"
    if nproc_per_node == "auto":
        nproc_per_node = ngpu if ngpu > 0 else max(
            multiprocessing.cpu_count() // 2, 1)

    # to precent multiple download for preatrined checkpoint, create model in the main process
    model = getattr(detection, model)(pretrained=True)

    if model.__class__.__name__ == "FasterRCNN":
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 21)
    elif model.__class__.__name__ == "RetinaNet":
        head = RetinaNetClassificationHead(
            model.backbone.out_channels,
            model.anchor_generator.num_anchors_per_location()[0],
            num_classes=21)
        model.head.classification_head = head

    with idist.Parallel(backend=backend,
                        nproc_per_node=nproc_per_node) as parallel:
        parallel.run(
            run,
            "cuda" if ngpu > 0 else "cpu",
            experiment_name,
            gpus,
            dataset_root,
            log_dir,
            model,
            epochs,
            batch_size,
            lr,
            download,
            image_size,
            resume_from,
        )
def main():
    # train on the GPU or on the CPU
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    #device = torch.device('cpu')
    # Create datasets with the right transforms
    train_dataset = porpoise_dataset(DATA_PATH, TRANSFORM_TRAIN)
    val_dataset = porpoise_dataset(DATA_PATH, TRANSFORM_VAL)

    # Spiltting the dataset train and validation 90/10
    split_pct = int(len(train_dataset) * TRAIN_SPLIT)
    indices = torch.randperm(len(train_dataset)).tolist()
    train_dataset = torch.utils.data.Subset(train_dataset,
                                            indices[:-split_pct])
    val_dataset = torch.utils.data.Subset(val_dataset, indices[-split_pct:])

    dataloader_train = data.DataLoader(train_dataset,
                                       batch_size=BATCH_SIZE,
                                       shuffle=True,
                                       num_workers=NUM_WORKERS,
                                       pin_memory=True,
                                       collate_fn=utils.collate_fn)
    dataloader_val = data.DataLoader(val_dataset,
                                     batch_size=BATCH_SIZE,
                                     shuffle=False,
                                     num_workers=NUM_WORKERS,
                                     pin_memory=True,
                                     collate_fn=utils.collate_fn)

    #Change anchor sizes
    anchor_sizes = ((16, ), (32, ), (64, ), (128, ), (256, ))
    aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)

    rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

    # Using pretrained resnet50 model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True, rpn_anchor_generator=rpn_anchor_generator)

    # replace the classifier with a clasifier for only porpoise and bg
    num_classes = 2
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # Send to GPU
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=0.005,
                                momentum=0.9,
                                weight_decay=0.0005)

    # train it for 10 epochs
    num_epochs = 10

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model,
                        optimizer,
                        dataloader_train,
                        device,
                        epoch,
                        print_freq=10)
        # evaluate on the test dataset
        evaluate(model, dataloader_val, device=device)

    torch.save(model, DATA_PATH + "/model_16_10e_2905")
    def __init__(self,
                 out_channels,
                 num_classes,
                 input_mode,
                 acf_head,
                 fg_iou_thresh=0.5,
                 bg_iou_thresh=0.5,
                 batch_size_per_image=512,
                 positive_fraction=0.25,
                 bbox_reg_weights=None,
                 box_score_thresh=0.05,
                 box_nms_thresh=0.5,
                 box_detections_per_img=100):
        super(RoIHeadsExtend, self).__init__()

        self.in_channels = out_channels
        self.input_mode = input_mode
        self.score_thresh = box_score_thresh
        self.nms_thresh = box_nms_thresh
        self.detections_per_img = box_detections_per_img
        self.fg_iou_thresh = fg_iou_thresh
        self.bg_iou_thresh = bg_iou_thresh
        self.batch_size_per_image = batch_size_per_image
        self.positive_fraction = positive_fraction
        self.num_classes = num_classes

        # Detection
        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                               output_size=7,
                                               sampling_ratio=2)

        representation_size = 1024
        resolution = self.box_roi_pool.output_size[0]
        self.box_head = TwoMLPHead(out_channels * resolution**2,
                                   representation_size)

        self.box_predictor = FastRCNNPredictor(representation_size,
                                               num_classes)

        # Segmentation
        self.shared_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                                  output_size=14,
                                                  sampling_ratio=2)
        resolution = self.shared_roi_pool.output_size[0]

        mask_layers = (256, 256, 256, 256, 256, 256, 256, 256)
        mask_dilation = 1
        self.mask_head = MaskRCNNHeads(out_channels, mask_layers,
                                       mask_dilation)

        mask_predictor_in_channels = 256  # == mask_layers[-1]
        mask_dim_reduced = 256
        self.mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
                                                mask_dim_reduced, num_classes)

        self.with_paf_branch = True
        if self.with_paf_branch:
            self.paf_head = MaskRCNNHeads(out_channels, mask_layers,
                                          mask_dilation)
            self.paf_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
                                                   mask_dim_reduced,
                                                   2 * (num_classes - 1))

        if self.input_mode == config.INPUT_RGBD:
            self.attention_block = ContextBlock(256, 2)
            self.global_feature_dim = 256
            self.with_3d_keypoints = True
            self.with_axis_keypoints = False
            self.regress_axis = False
            self.estimate_norm_vector = False
            if acf_head == 'endpoints':
                self.with_axis_keypoints = True
            elif acf_head == 'scatters':
                self.regress_axis = True
            elif acf_head == 'norm_vector':
                self.estimate_norm_vector = True
            else:
                print("Don't assign a vaild acf head")
                exit()
            keypoint_layers = (256, ) * 4
            self.keypoint_dim_reduced = keypoint_layers[-1]
            if self.with_3d_keypoints:
                self.vote_keypoint_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")
                self.vote_keypoint_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 3 * (num_classes - 1))
            if self.with_axis_keypoints:
                self.orientation_keypoint_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")

                self.orientation_keypoint_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 6 * (num_classes - 1))

            if self.regress_axis:
                self.axis_head = Vote_Kpoints_head(self.global_feature_dim,
                                                   keypoint_layers, "conv2d")
                self.axis_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 4 * (num_classes - 1))

            if self.estimate_norm_vector:
                self.norm_vector_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")
                self.norm_vector_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 3 * (num_classes - 1))
Example #29
0
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)


import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor


def get_model_instance_segmentation(num_classes):
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)
    return model
Example #30
0
def main(score_th=0.25):
    N_CLASSES = len(CLASS_NAMES_Vin)
    torch.backends.cudnn.benchmark = True
    #classification pre-trained model
    CKPT_PATH = '/data/pycode/CXRAD/ckpt/SANet.pkl'
    cls_model = SANet(num_classes=N_CLASSES)
    if os.path.exists(CKPT_PATH):
        checkpoint = torch.load(CKPT_PATH)
        cls_model.load_state_dict(checkpoint)  #strict=False
        print(
            "=> Loaded well-trained SANet model checkpoint of Vin-CXR dataset: "
            + CKPT_PATH)
    cls_model = cls_model.cuda()
    cls_model.eval()
    #detection pre-trained model
    od_model = torchvision.models.detection.maskrcnn_resnet50_fpn(
        pretrained=True)
    in_features = od_model.roi_heads.box_predictor.cls_score.in_features
    od_model.roi_heads.box_predictor = FastRCNNPredictor(
        in_features, N_CLASSES)
    in_features_mask = od_model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    od_model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_features_mask, hidden_layer, N_CLASSES)  #
    CKPT_PATH = '/data/pycode/CXRAD/ckpt/Maskrcnn.pkl'
    if os.path.exists(CKPT_PATH):
        checkpoint = torch.load(CKPT_PATH)
        od_model.load_state_dict(checkpoint)  #strict=False
        print(
            "=> Loaded well-trained Maskrcnn model checkpoint of Vin-CXR dataset: "
            + CKPT_PATH)
    od_model = od_model.cuda()
    od_model.eval()

    #CVTE-CXR dataset
    cvte_csv_file = '/data/pycode/CXRAD/dataset/cvte_test.txt'  #testing file patt
    cvte_image_dir = '/data/fjsdata/CVTEDR/images/'  #image path
    # test images and show the results
    images = pd.read_csv(cvte_csv_file, sep=',', header=None).values
    gt, pred, box = [], [], []
    for image in images:
        gt.append(image[1])
        img = cvte_image_dir + image[0]
        image = Image.open(img).convert('RGB')
        image = torch.unsqueeze(transform_seq(image), 0)
        var_image = torch.autograd.Variable(image).cuda()
        #generate classification result
        var_output = cls_model(var_image)  #forward
        prob_cls = 1 - var_output[0].data.cpu()[0].numpy()
        #generate detection result
        var_output = od_model(var_image)  #dict
        boxes = var_output[0]['boxes'].data.cpu().numpy()
        scores = var_output[0]['scores'].data.cpu().numpy()
        if len(scores) > 0:
            ind = np.argmax(scores)
            pred.append(max([prob_cls, scores[ind]]))
            box.append(boxes[ind])
        else:
            pred.append(prob_cls)
            box.append([0, 0, 1, 1])

        sys.stdout.write('\r image process: = {}'.format(len(pred)))
        sys.stdout.flush()
    #evaluation
    gt_np = np.array(gt)
    pred_np = np.array(pred)
    box = np.arrya(box)
    assert gt_np.shape == pred_np.shape
    #AUROCS
    AUROCs = roc_auc_score(gt_np, pred_np)
    print('AUROC = {:.4f}'.format(AUROCs))
    #sensitivity and specificity
    pred_np = np.where(pred_np > score_th, 1, 0)
    tn, fp, fn, tp = confusion_matrix(gt_np, pred_np).ravel()
    sen = tp / (tp + fn)
    spe = tn / (tn + fp)
    print('\r\rSen = {:.4f} and Spe = {:.4f}'.format(sen, spe))