Esempio n. 1
0
def get_model_keypoint_detection_custom(num_classes, num_keypoints):
    #backbone = myutils.VGG16()
    backbone = torchvision.models.vgg16().features
    # KeypointRCNN needs to know the number of
    # output channels in a backbone. For mobilenet_v2, it's 1280
    # so we need to add it here

    ################# USE PRETRAINED DEEP FASHION 1 #################
    pretrained_dict = torch.load(dp1_PATH + 'stage3_hard.pt')
    pretrained_dict2 = torch.load(dp1_PATH + 'fashion_detector.pt')
    model_dict = backbone.state_dict()
    # 0. rename keys to use in torchvision
    pretrained_dict['0.weight'] = pretrained_dict.pop('conv1_1.weight')
    pretrained_dict['2.weight'] = pretrained_dict.pop('conv1_2.weight')
    pretrained_dict['5.weight'] = pretrained_dict.pop('conv2_1.weight')
    pretrained_dict['7.weight'] = pretrained_dict.pop('conv2_2.weight')
    pretrained_dict['10.weight'] = pretrained_dict.pop('conv3_1.weight')
    pretrained_dict['12.weight'] = pretrained_dict.pop('conv3_2.weight')
    pretrained_dict['14.weight'] = pretrained_dict.pop('conv3_3.weight')
    pretrained_dict['17.weight'] = pretrained_dict.pop('conv4_1.weight')
    pretrained_dict['19.weight'] = pretrained_dict.pop('conv4_2.weight')
    pretrained_dict['21.weight'] = pretrained_dict.pop('conv4_3.weight')
    pretrained_dict['24.weight'] = pretrained_dict.pop('conv5_1.weight')
    pretrained_dict['26.weight'] = pretrained_dict.pop('conv5_2.weight')
    pretrained_dict['28.weight'] = pretrained_dict.pop('conv5_3.weight')
    pretrained_dict['31.weight'] = pretrained_dict.pop('fc6.weight')

    pretrained_dict['0.bias'] = pretrained_dict.pop('conv1_1.bias')
    pretrained_dict['2.bias'] = pretrained_dict.pop('conv1_2.bias')
    pretrained_dict['5.bias'] = pretrained_dict.pop('conv2_1.bias')
    pretrained_dict['7.bias'] = pretrained_dict.pop('conv2_2.bias')
    pretrained_dict['10.bias'] = pretrained_dict.pop('conv3_1.bias')
    pretrained_dict['12.bias'] = pretrained_dict.pop('conv3_2.bias')
    pretrained_dict['14.bias'] = pretrained_dict.pop('conv3_3.bias')
    pretrained_dict['17.bias'] = pretrained_dict.pop('conv4_1.bias')
    pretrained_dict['19.bias'] = pretrained_dict.pop('conv4_2.bias')
    pretrained_dict['21.bias'] = pretrained_dict.pop('conv4_3.bias')
    pretrained_dict['24.bias'] = pretrained_dict.pop('conv5_1.bias')
    pretrained_dict['26.bias'] = pretrained_dict.pop('conv5_2.bias')
    pretrained_dict['28.bias'] = pretrained_dict.pop('conv5_3.bias')
    pretrained_dict['31.bias'] = pretrained_dict.pop('fc6.bias')
    # 1. filter out unnecessary keys
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items() if k in model_dict
    }

    #################################################################
    backbone.out_channels = 512

    # put the pieces together inside a FasterRCNN model
    model = KeypointRCNN(backbone,
                         num_classes=num_classes,
                         num_keypoints=num_keypoints)
    return model
Esempio n. 2
0
def testing(args):

    # Device setting
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Submit data open
    submit = pd.read_csv('/HDD/dataset/dacon/pose/sample_submission.csv')

    # Model setting
    backbone = resnet_fpn_backbone('resnet101', pretrained=True)
    roi_pooler = MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'],
                                    output_size=7,
                                    sampling_ratio=2)

    keypoint_roi_pooler = MultiScaleRoIAlign(
        featmap_names=['0', '1', '2', '3'], output_size=14, sampling_ratio=2)
    model = KeypointRCNN(backbone,
                         num_classes=2,
                         num_keypoints=24,
                         box_roi_pool=roi_pooler,
                         keypoint_roi_pool=keypoint_roi_pooler)
    model = model.to(device)

    checkpoint = torch.load(args.file_name, map_location='cpu')
    model.load_state_dict(checkpoint['model'])
    model = model.to(device)
    model = model.eval()

    for i, img_id in enumerate(tqdm(submit['image'])):
        image = cv2.imread(
            os.path.join('/HDD/dataset/dacon/pose/test_imgs/', img_id),
            cv2.COLOR_BGR2RGB)
        image = image / 255.0
        image = image.transpose(2, 0, 1)
        image = [torch.as_tensor(image, dtype=torch.float32).to(device)]

        preds = model(image)
        preds_ = preds[0]['keypoints'][0][:, :2].detach().cpu().numpy(
        ).reshape(-1)
        submit.iloc[i, 1:] = preds_

    # Save
    error_list = np.array([
        317, 869, 873, 877, 911, 1559, 1560, 1562, 1566, 1575, 1577, 1578,
        1582, 1606, 1607, 1622, 1623, 1624, 1625, 1629, 3968, 4115, 4116, 4117,
        4118, 4119, 4120, 4121, 4122, 4123, 4124, 4125, 4126, 4127, 4128, 4129,
        4130, 4131, 4132, 4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, 4141,
        4142, 4143, 4144, 4145, 4146, 4147, 4148, 4149, 4150, 4151, 4152, 4153,
        4154, 4155, 4156, 4157, 4158, 4159, 4160, 4161, 4162, 4163, 4164, 4165,
        4166, 4167, 4168, 4169, 4170, 4171, 4172, 4173, 4174, 4175, 4176, 4177,
        4178, 4179, 4180, 4181, 4182, 4183, 4184, 4185, 4186, 4187, 4188, 4189,
        4190, 4191, 4192, 4193, 4194
    ])
    submit2 = submit.loc[~submit.index.isin(error_list)]
    submit.to_csv('./submix_new.csv', index=False)
    submit2.to_csv('./submix_new2.csv', index=False)
Esempio n. 3
0
def get_model() -> nn.Module:
    backbone = resnet_fpn_backbone('resnet101', pretrained=True)
    roi_pooler = MultiScaleRoIAlign(
        featmap_names=['0', '1', '2', '3'],
        output_size=7,
        sampling_ratio=2
    )

    keypoint_roi_pooler = MultiScaleRoIAlign(
        featmap_names=['0', '1', '2', '3'],
        output_size=14,
        sampling_ratio=2
    )

    model = KeypointRCNN(
        backbone,
        num_classes=2,
        num_keypoints=24,
        box_roi_pool=roi_pooler,
        keypoint_roi_pool=keypoint_roi_pooler
    )

    return model
def keypointrcnn_mobilenet(backbone_name, path, device):
    if backbone_name == "mobilenet_v3_large":
        backbone = torchvision.models.mobilenet_v3_large(
            pretrained=True).features
        backbone.out_channels = 960
    elif backbone_name == "mobilenet_v3_small":
        backbone = torchvision.models.mobilenet_v3_small(
            pretrained=True).features
        backbone.out_channels = 576
    elif backbone_name == "mobilenet_v2":
        backbone = torchvision.models.mobilenet_v2(pretrained=True).features
        backbone.out_channels = 1280
    else:
        raise Exception('Bad backbone name')

    anchor_generator = AnchorGenerator(sizes=((16, 32, 64, 128, 256), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                    output_size=7,
                                                    sampling_ratio=2)
    keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=['0'], output_size=14, sampling_ratio=2)
    model_keypoints = KeypointRCNN(backbone,
                                   num_classes=6,
                                   num_keypoints=20,
                                   rpn_anchor_generator=anchor_generator,
                                   box_roi_pool=roi_pooler,
                                   keypoint_roi_pool=keypoint_roi_pooler)

    model_keypoints = model_keypoints.to(device)

    model_keypoints.load_state_dict(torch.load(path, map_location=device))
    model_keypoints.eval()

    return model_keypoints
Esempio n. 5
0
def get_model(config):

    model = None
    # input_size = 0

    if config.model_name == "resnet":
        """ Resnet34
        """
        model = models.resnet18(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.fc.in_features
        model.fc = nn.Linear(n_features, config.n_classes)
        # input_size = 224
    elif config.model_name == "alexnet":
        """ Alexnet
        """
        model = models.alexnet(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(n_features, config.n_classes)
        # input_size = 224
    elif config.model_name == "vgg":
        """ VGG16_bn
        """
        model = models.vgg16_bn(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(n_features, config.n_classes)
        # input_size = 224
    elif config.model_name == "densenet":
        """ Densenet
        """
        model = models.densenet121(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier.in_features
        model.classifier = nn.Linear(n_features, config.n_classes)
        # input_size = 224
        
    elif config.model_name == 'mobilenet':
        model = models.mobilenet_v2(pretrained = config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(n_features, config.n_classes)

        n_features

    elif config.model_name == "KeypointRCNN":
      backbone = models.mobilenet_v2(pretrained=True).features
      backbone.out_channels = 1280
      roi_pooler = MultiScaleRoIAlign(
          featmap_names=['0'],
          output_size=7,
          sampling_ratio=2
      )
      anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                    aspect_ratios=((0.5, 1.0, 2.0),))
      keypoint_roi_pooler = MultiScaleRoIAlign(
          featmap_names=['0'],
          output_size=14,
          sampling_ratio=2
      )

      model = KeypointRCNN(
          backbone, 
          num_classes=2,
          num_keypoints=24,
          box_roi_pool=roi_pooler,
          keypoint_roi_pool=keypoint_roi_pooler,rpn_anchor_generator=anchor_generator
      )

    elif config.model_name == "keypointrcnn_resnet50":
      model = models.detection.keypointrcnn_resnet50_fpn(pretrained=config.use_pretrained, progress=False)
      model.roi_heads.keypoint_predictor.kps_score_lowres = nn.ConvTranspose2d(512, 24, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    
    elif config.model_name == "keypointrcnn_resnet101":
      pretrained_backbone = True
      pretrained = False
      trainable_backbone_layers = None
      trainable_backbone_layers = _validate_trainable_layers(
              pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

      backbone = resnet_fpn_backbone('resnet101', pretrained_backbone, trainable_layers=trainable_backbone_layers)

      model = KeypointRCNN(
          backbone, 
          num_classes=2,
          num_keypoints=24)

    else:
        raise NotImplementedError('You need to specify model name.')

    return model
Esempio n. 6
0
backbone.out_channels = 1280

# RPN 生成5种不同尺寸的大小 3种比例 Tuple[Tuple[int]]
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                   aspect_ratios=((0.5, 1.0, 2.0), ))
# 让我们来定义我们将使用哪些特性映射来执行感兴趣的裁剪区域,以及重新缩放后的裁剪大小
# 如果主干返回一个张量,featmap_names应该是[0]
# 更一般地说,主干应该返回一个OrderedDict[Tensor]
# 在featmap_names中,您可以选择使用哪个feature maps
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                         output_size=14,
                                                         sampling_ratio=2)
# 输入参数
model = KeypointRCNN(backbone,
                     num_classes=2,
                     rpn_anchor_generator=anchor_generator,
                     box_roi_pool=roi_pooler,
                     keypoint_roi_pool=keypoint_roi_pooler)
model.eval()
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]

start = time.time()
predictions = model(x)
end = time.time()
print(end - start)
Esempio n. 7
0
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                   aspect_ratios=((0.5, 1.0, 2.0), ))
#Added two conv layers more to head of RPN
new_rpn_head = nn.Sequential(
    nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
              padding=(1, 1)),  #NEW
    nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
              padding=(1, 1)),  #NEW
    nn.Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1)),  #cls_logits
    nn.Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))  #bbox_pred
)

box_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)

keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                         output_size=14,
                                                         sampling_ratio=2)
#put the pieces together
deeper_model = KeypointRCNN(backbone,
                            num_classes=2,
                            rpn_anchor_generator=anchor_generator,
                            rpn_head=new_rpn_head,
                            box_roi_pool=box_roi_pooler,
                            keypoint_roi_pool=keypoint_roi_pooler)

print(deeper_model)
Esempio n. 8
0
def training(args):

    # Random seed
    random.seed(42)

    # Device setting
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Data pre-setting
    dat = pd.read_csv(os.path.join(args.data_path, 'train_df.csv'))
    index_list = list(range(len(dat)))
    random.shuffle(index_list)
    valid_count = int(len(index_list) * args.split)
    train_df = dat.iloc[index_list[:-valid_count]]
    valid_df = dat.iloc[index_list[-valid_count:]]

    # Transform setting
    transforms_dict = {
        'train':
        A.Compose([
            A.ShiftScaleRotate(
                shift_limit=0.2, scale_limit=0.2, rotate_limit=30, p=0.3),
            A.HorizontalFlip(p=0.3),
            A.RGBShift(
                r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.3),
            A.RandomBrightnessContrast(p=0.3),
            ToTensorV2()
        ],
                  bbox_params=A.BboxParams(format='pascal_voc',
                                           label_fields=['labels']),
                  keypoint_params=A.KeypointParams(format='xy',
                                                   remove_invisible=False,
                                                   angle_in_degrees=True)),
        'valid':
        A.Compose([ToTensorV2()],
                  bbox_params=A.BboxParams(format='pascal_voc',
                                           label_fields=['labels']),
                  keypoint_params=A.KeypointParams(format='xy',
                                                   remove_invisible=False,
                                                   angle_in_degrees=True))
    }

    # PyTorch dataloader setting
    dataset_dict = {
        'train':
        KeypointDataset(os.path.join(args.data_path, 'train_imgs/'), train_df,
                        transforms_dict['train']),
        'valid':
        KeypointDataset(os.path.join(args.data_path, 'train_imgs/'), valid_df,
                        transforms_dict['valid']),
    }
    dataloader_dict = {
        'train':
        DataLoader(dataset_dict['train'],
                   batch_size=args.batch_size,
                   shuffle=True,
                   num_workers=args.num_workers,
                   collate_fn=collate_fn),
        'valid':
        DataLoader(dataset_dict['valid'],
                   batch_size=args.batch_size,
                   shuffle=True,
                   num_workers=args.num_workers,
                   collate_fn=collate_fn),
    }

    # Model setting
    backbone = resnet_fpn_backbone('resnet101', pretrained=True)
    roi_pooler = MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'],
                                    output_size=7,
                                    sampling_ratio=2)

    keypoint_roi_pooler = MultiScaleRoIAlign(
        featmap_names=['0', '1', '2', '3'], output_size=14, sampling_ratio=2)
    model = KeypointRCNN(backbone,
                         num_classes=2,
                         num_keypoints=24,
                         box_roi_pool=roi_pooler,
                         keypoint_roi_pool=keypoint_roi_pooler)
    model = model.to(device)

    # Optimizer setting
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.w_decay)
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=0.1,
                                  patience=len(dataloader_dict['train']) / 1.5)

    # Resume
    start_epoch = 0
    if args.resume:
        print('resume!')
        checkpoint = torch.load(args.file_name, map_location='cpu')
        start_epoch = checkpoint['epoch'] + 1
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
        model = model.to(device)

    # Train start

    best_val_rmse = None

    for epoch in range(start_epoch, args.num_epochs):
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            if phase == 'valid':
                print('Validation start...')
                model.eval()
                val_rmse = 0
            for i, (images,
                    targets) in enumerate(tqdm(dataloader_dict[phase])):
                # Optimizer setting
                optimizer.zero_grad()

                # Input, output setting
                images = list(image.to(device) for image in images)
                targets = [{k: v.to(device)
                            for k, v in t.items()} for t in targets]

                with torch.set_grad_enabled(phase == 'train'):
                    losses = model(images, targets)
                    if phase == 'train':
                        loss = sum(loss for loss in losses.values())
                        loss.backward()
                        clip_grad_norm_(model.parameters(), args.grad_clip)
                        optimizer.step()

                        if (i + 1) % 100 == 0:
                            print(
                                f'| epoch: {epoch} | lr: {optimizer.param_groups[0]["lr"]} | loss: {loss.item():.4f}',
                                end=' | ')
                            for k, v in losses.items():
                                print(f'{k[5:]}: {v.item():.4f}', end=' | ')
                            print()
                    if phase == 'valid':
                        for i, l in enumerate(losses):
                            pred_ = l['keypoints'][0][:, :2].detach().cpu(
                            ).numpy().reshape(-1)
                            target_ = targets[i]['keypoints'][0][:, :2].cpu(
                            ).numpy().reshape(-1)
                            val_rmse += np.sqrt(((pred_ - target_)**2).mean())

            if phase == 'valid':
                val_rmse /= len(dataloader_dict[phase])
                print(f'Validation RMSE: {val_rmse}')
                if not best_val_rmse or val_rmse < best_val_rmse:
                    print('Checkpoint saving...')
                    torch.save(
                        {
                            'epoch': epoch,
                            'model': model.state_dict(),
                            'optimizer': optimizer.state_dict(),
                            'scheduler': scheduler.state_dict(),
                        }, args.file_name)
                    best_val_rmse = val_rmse