def get_model_keypoint_detection_custom(num_classes, num_keypoints): #backbone = myutils.VGG16() backbone = torchvision.models.vgg16().features # KeypointRCNN needs to know the number of # output channels in a backbone. For mobilenet_v2, it's 1280 # so we need to add it here ################# USE PRETRAINED DEEP FASHION 1 ################# pretrained_dict = torch.load(dp1_PATH + 'stage3_hard.pt') pretrained_dict2 = torch.load(dp1_PATH + 'fashion_detector.pt') model_dict = backbone.state_dict() # 0. rename keys to use in torchvision pretrained_dict['0.weight'] = pretrained_dict.pop('conv1_1.weight') pretrained_dict['2.weight'] = pretrained_dict.pop('conv1_2.weight') pretrained_dict['5.weight'] = pretrained_dict.pop('conv2_1.weight') pretrained_dict['7.weight'] = pretrained_dict.pop('conv2_2.weight') pretrained_dict['10.weight'] = pretrained_dict.pop('conv3_1.weight') pretrained_dict['12.weight'] = pretrained_dict.pop('conv3_2.weight') pretrained_dict['14.weight'] = pretrained_dict.pop('conv3_3.weight') pretrained_dict['17.weight'] = pretrained_dict.pop('conv4_1.weight') pretrained_dict['19.weight'] = pretrained_dict.pop('conv4_2.weight') pretrained_dict['21.weight'] = pretrained_dict.pop('conv4_3.weight') pretrained_dict['24.weight'] = pretrained_dict.pop('conv5_1.weight') pretrained_dict['26.weight'] = pretrained_dict.pop('conv5_2.weight') pretrained_dict['28.weight'] = pretrained_dict.pop('conv5_3.weight') pretrained_dict['31.weight'] = pretrained_dict.pop('fc6.weight') pretrained_dict['0.bias'] = pretrained_dict.pop('conv1_1.bias') pretrained_dict['2.bias'] = pretrained_dict.pop('conv1_2.bias') pretrained_dict['5.bias'] = pretrained_dict.pop('conv2_1.bias') pretrained_dict['7.bias'] = pretrained_dict.pop('conv2_2.bias') pretrained_dict['10.bias'] = pretrained_dict.pop('conv3_1.bias') pretrained_dict['12.bias'] = pretrained_dict.pop('conv3_2.bias') pretrained_dict['14.bias'] = pretrained_dict.pop('conv3_3.bias') pretrained_dict['17.bias'] = pretrained_dict.pop('conv4_1.bias') pretrained_dict['19.bias'] = pretrained_dict.pop('conv4_2.bias') pretrained_dict['21.bias'] = pretrained_dict.pop('conv4_3.bias') pretrained_dict['24.bias'] = pretrained_dict.pop('conv5_1.bias') pretrained_dict['26.bias'] = pretrained_dict.pop('conv5_2.bias') pretrained_dict['28.bias'] = pretrained_dict.pop('conv5_3.bias') pretrained_dict['31.bias'] = pretrained_dict.pop('fc6.bias') # 1. filter out unnecessary keys pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } ################################################################# backbone.out_channels = 512 # put the pieces together inside a FasterRCNN model model = KeypointRCNN(backbone, num_classes=num_classes, num_keypoints=num_keypoints) return model
def testing(args): # Device setting device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Submit data open submit = pd.read_csv('/HDD/dataset/dacon/pose/sample_submission.csv') # Model setting backbone = resnet_fpn_backbone('resnet101', pretrained=True) roi_pooler = MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) keypoint_roi_pooler = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=14, sampling_ratio=2) model = KeypointRCNN(backbone, num_classes=2, num_keypoints=24, box_roi_pool=roi_pooler, keypoint_roi_pool=keypoint_roi_pooler) model = model.to(device) checkpoint = torch.load(args.file_name, map_location='cpu') model.load_state_dict(checkpoint['model']) model = model.to(device) model = model.eval() for i, img_id in enumerate(tqdm(submit['image'])): image = cv2.imread( os.path.join('/HDD/dataset/dacon/pose/test_imgs/', img_id), cv2.COLOR_BGR2RGB) image = image / 255.0 image = image.transpose(2, 0, 1) image = [torch.as_tensor(image, dtype=torch.float32).to(device)] preds = model(image) preds_ = preds[0]['keypoints'][0][:, :2].detach().cpu().numpy( ).reshape(-1) submit.iloc[i, 1:] = preds_ # Save error_list = np.array([ 317, 869, 873, 877, 911, 1559, 1560, 1562, 1566, 1575, 1577, 1578, 1582, 1606, 1607, 1622, 1623, 1624, 1625, 1629, 3968, 4115, 4116, 4117, 4118, 4119, 4120, 4121, 4122, 4123, 4124, 4125, 4126, 4127, 4128, 4129, 4130, 4131, 4132, 4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, 4141, 4142, 4143, 4144, 4145, 4146, 4147, 4148, 4149, 4150, 4151, 4152, 4153, 4154, 4155, 4156, 4157, 4158, 4159, 4160, 4161, 4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169, 4170, 4171, 4172, 4173, 4174, 4175, 4176, 4177, 4178, 4179, 4180, 4181, 4182, 4183, 4184, 4185, 4186, 4187, 4188, 4189, 4190, 4191, 4192, 4193, 4194 ]) submit2 = submit.loc[~submit.index.isin(error_list)] submit.to_csv('./submix_new.csv', index=False) submit2.to_csv('./submix_new2.csv', index=False)
def get_model() -> nn.Module: backbone = resnet_fpn_backbone('resnet101', pretrained=True) roi_pooler = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2 ) keypoint_roi_pooler = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=14, sampling_ratio=2 ) model = KeypointRCNN( backbone, num_classes=2, num_keypoints=24, box_roi_pool=roi_pooler, keypoint_roi_pool=keypoint_roi_pooler ) return model
def keypointrcnn_mobilenet(backbone_name, path, device): if backbone_name == "mobilenet_v3_large": backbone = torchvision.models.mobilenet_v3_large( pretrained=True).features backbone.out_channels = 960 elif backbone_name == "mobilenet_v3_small": backbone = torchvision.models.mobilenet_v3_small( pretrained=True).features backbone.out_channels = 576 elif backbone_name == "mobilenet_v2": backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 else: raise Exception('Bad backbone name') anchor_generator = AnchorGenerator(sizes=((16, 32, 64, 128, 256), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0'], output_size=14, sampling_ratio=2) model_keypoints = KeypointRCNN(backbone, num_classes=6, num_keypoints=20, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, keypoint_roi_pool=keypoint_roi_pooler) model_keypoints = model_keypoints.to(device) model_keypoints.load_state_dict(torch.load(path, map_location=device)) model_keypoints.eval() return model_keypoints
def get_model(config): model = None # input_size = 0 if config.model_name == "resnet": """ Resnet34 """ model = models.resnet18(pretrained=config.use_pretrained) set_parameter_requires_grad(model, config.freeze) n_features = model.fc.in_features model.fc = nn.Linear(n_features, config.n_classes) # input_size = 224 elif config.model_name == "alexnet": """ Alexnet """ model = models.alexnet(pretrained=config.use_pretrained) set_parameter_requires_grad(model, config.freeze) n_features = model.classifier[-1].in_features model.classifier[-1] = nn.Linear(n_features, config.n_classes) # input_size = 224 elif config.model_name == "vgg": """ VGG16_bn """ model = models.vgg16_bn(pretrained=config.use_pretrained) set_parameter_requires_grad(model, config.freeze) n_features = model.classifier[-1].in_features model.classifier[-1] = nn.Linear(n_features, config.n_classes) # input_size = 224 elif config.model_name == "densenet": """ Densenet """ model = models.densenet121(pretrained=config.use_pretrained) set_parameter_requires_grad(model, config.freeze) n_features = model.classifier.in_features model.classifier = nn.Linear(n_features, config.n_classes) # input_size = 224 elif config.model_name == 'mobilenet': model = models.mobilenet_v2(pretrained = config.use_pretrained) set_parameter_requires_grad(model, config.freeze) n_features = model.classifier[-1].in_features model.classifier[-1] = nn.Linear(n_features, config.n_classes) n_features elif config.model_name == "KeypointRCNN": backbone = models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 roi_pooler = MultiScaleRoIAlign( featmap_names=['0'], output_size=7, sampling_ratio=2 ) anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) keypoint_roi_pooler = MultiScaleRoIAlign( featmap_names=['0'], output_size=14, sampling_ratio=2 ) model = KeypointRCNN( backbone, num_classes=2, num_keypoints=24, box_roi_pool=roi_pooler, keypoint_roi_pool=keypoint_roi_pooler,rpn_anchor_generator=anchor_generator ) elif config.model_name == "keypointrcnn_resnet50": model = models.detection.keypointrcnn_resnet50_fpn(pretrained=config.use_pretrained, progress=False) model.roi_heads.keypoint_predictor.kps_score_lowres = nn.ConvTranspose2d(512, 24, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)) elif config.model_name == "keypointrcnn_resnet101": pretrained_backbone = True pretrained = False trainable_backbone_layers = None trainable_backbone_layers = _validate_trainable_layers( pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3) backbone = resnet_fpn_backbone('resnet101', pretrained_backbone, trainable_layers=trainable_backbone_layers) model = KeypointRCNN( backbone, num_classes=2, num_keypoints=24) else: raise NotImplementedError('You need to specify model name.') return model
backbone.out_channels = 1280 # RPN 生成5种不同尺寸的大小 3种比例 Tuple[Tuple[int]] anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) # 让我们来定义我们将使用哪些特性映射来执行感兴趣的裁剪区域,以及重新缩放后的裁剪大小 # 如果主干返回一个张量,featmap_names应该是[0] # 更一般地说,主干应该返回一个OrderedDict[Tensor] # 在featmap_names中,您可以选择使用哪个feature maps roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=14, sampling_ratio=2) # 输入参数 model = KeypointRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, keypoint_roi_pool=keypoint_roi_pooler) model.eval() model.eval() x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] start = time.time() predictions = model(x) end = time.time() print(end - start)
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) #Added two conv layers more to head of RPN new_rpn_head = nn.Sequential( nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), #NEW nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), #NEW nn.Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1)), #cls_logits nn.Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1)) #bbox_pred ) box_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=14, sampling_ratio=2) #put the pieces together deeper_model = KeypointRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, rpn_head=new_rpn_head, box_roi_pool=box_roi_pooler, keypoint_roi_pool=keypoint_roi_pooler) print(deeper_model)
def training(args): # Random seed random.seed(42) # Device setting device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Data pre-setting dat = pd.read_csv(os.path.join(args.data_path, 'train_df.csv')) index_list = list(range(len(dat))) random.shuffle(index_list) valid_count = int(len(index_list) * args.split) train_df = dat.iloc[index_list[:-valid_count]] valid_df = dat.iloc[index_list[-valid_count:]] # Transform setting transforms_dict = { 'train': A.Compose([ A.ShiftScaleRotate( shift_limit=0.2, scale_limit=0.2, rotate_limit=30, p=0.3), A.HorizontalFlip(p=0.3), A.RGBShift( r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.3), A.RandomBrightnessContrast(p=0.3), ToTensorV2() ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']), keypoint_params=A.KeypointParams(format='xy', remove_invisible=False, angle_in_degrees=True)), 'valid': A.Compose([ToTensorV2()], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']), keypoint_params=A.KeypointParams(format='xy', remove_invisible=False, angle_in_degrees=True)) } # PyTorch dataloader setting dataset_dict = { 'train': KeypointDataset(os.path.join(args.data_path, 'train_imgs/'), train_df, transforms_dict['train']), 'valid': KeypointDataset(os.path.join(args.data_path, 'train_imgs/'), valid_df, transforms_dict['valid']), } dataloader_dict = { 'train': DataLoader(dataset_dict['train'], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn), 'valid': DataLoader(dataset_dict['valid'], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn), } # Model setting backbone = resnet_fpn_backbone('resnet101', pretrained=True) roi_pooler = MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) keypoint_roi_pooler = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=14, sampling_ratio=2) model = KeypointRCNN(backbone, num_classes=2, num_keypoints=24, box_roi_pool=roi_pooler, keypoint_roi_pool=keypoint_roi_pooler) model = model.to(device) # Optimizer setting optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.w_decay) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=len(dataloader_dict['train']) / 1.5) # Resume start_epoch = 0 if args.resume: print('resume!') checkpoint = torch.load(args.file_name, map_location='cpu') start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) model = model.to(device) # Train start best_val_rmse = None for epoch in range(start_epoch, args.num_epochs): for phase in ['train', 'valid']: if phase == 'train': model.train() if phase == 'valid': print('Validation start...') model.eval() val_rmse = 0 for i, (images, targets) in enumerate(tqdm(dataloader_dict[phase])): # Optimizer setting optimizer.zero_grad() # Input, output setting images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] with torch.set_grad_enabled(phase == 'train'): losses = model(images, targets) if phase == 'train': loss = sum(loss for loss in losses.values()) loss.backward() clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() if (i + 1) % 100 == 0: print( f'| epoch: {epoch} | lr: {optimizer.param_groups[0]["lr"]} | loss: {loss.item():.4f}', end=' | ') for k, v in losses.items(): print(f'{k[5:]}: {v.item():.4f}', end=' | ') print() if phase == 'valid': for i, l in enumerate(losses): pred_ = l['keypoints'][0][:, :2].detach().cpu( ).numpy().reshape(-1) target_ = targets[i]['keypoints'][0][:, :2].cpu( ).numpy().reshape(-1) val_rmse += np.sqrt(((pred_ - target_)**2).mean()) if phase == 'valid': val_rmse /= len(dataloader_dict[phase]) print(f'Validation RMSE: {val_rmse}') if not best_val_rmse or val_rmse < best_val_rmse: print('Checkpoint saving...') torch.save( { 'epoch': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), }, args.file_name) best_val_rmse = val_rmse