def __init__(self, args): super().__init__() # Initialize Stem to adjust number of channels self.stem = nn.Conv3d(1, 3, (1, 3, 3), stride=1, padding=(0, 1, 1)) # Initialize 3D Resnet Model self.resnet3d = None if args.resnet3d_model == "r3d_18": # 18 layer Resnet3D self.resnet3d = r3d_18(pretrained=True) elif args.resnet3d_model == "mc3_18": # 18 layer Mixed Convolution network self.resnet3d = mc3_18(pretrained=True) else: # 18 layer deep R(2+1)D network self.resnet3d = r2plus1d_18(pretrained=True) self.resnet3d_out_features = self.resnet3d.fc.out_features self.features = args.features # # FC layers between resnet3d and the heads self.x1 = nn.Linear(self.resnet3d_out_features, self.resnet3d_out_features) nn.init.kaiming_normal_(self.x1.weight) self.dropout1 = nn.Dropout(p=0.2) self.x2 = nn.Linear(self.resnet3d_out_features, self.resnet3d_out_features // 2) nn.init.kaiming_normal_(self.x2.weight) self.dropout2 = nn.Dropout(p=0.2) for feature in self.features: setattr(self, f"{feature}_head", ClassifierHead(self.resnet3d_out_features // 2, 1))
def __init__(self): """Generic resnet video generator. Args: block (nn.Module): resnet building block conv_makers (list(functions)): generator function for each layer layers (List[int]): number of blocks per layer stem (nn.Module, optional): Resnet stem, if None, defaults to conv-bn-relu. Defaults to None. num_classes (int, optional): Dimension of the final FC layer. Defaults to 400. zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False. """ super(ResNet3D, self).__init__() self.backbone = r3d_18(pretrained=True)
def __init__(self, device, num_layers, weights, use_mask): super(_3DLoss, self).__init__() model = r3d_18(pretrained=True).to(device) ps = list(model.parameters()) for p in ps: p.requires_grad = False self.model = [] self.model.append(model.stem) self.model.append(model.layer1) self.model.append(model.layer2) self.model.append(model.layer3) self.layer = num_layers self.weights = weights self.use_mask = use_mask self.loss = nn.L1Loss()
def __init__(self): super(VOSModel, self).__init__() self.r3d_model = r3d_18(pretrained=True, progress=True) self.r3d_model_children = list(self.r3d_model.children())[:5] self.frame_encoder = Encoder_M() self.lstm_op1 = LSTMOp(512, 256, 256) self.lstm_op2 = LSTMOp(256, 128, 128) self.lstm_op3 = LSTMOp(128, 64, 64) self.decoder = Decoder(256) self.register_buffer( 'mean', torch.FloatTensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1, 1)) self.register_buffer( 'std', torch.FloatTensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1, 1))
def test_r3d_18_video(self): x = Variable(torch.randn(1, 3, 4, 112, 112).fill_(1.0)) self.exportTest(toC(r3d_18()), toC(x), rtol=1e-3, atol=1e-5)
def main(model_name, mode, root, val_split, ckpt, batch_per_gpu): num_gpus = MPI.COMM_WORLD.Get_size() distributed = False if num_gpus > 1: distributed = True local_rank = MPI.COMM_WORLD.Get_rank() % torch.cuda.device_count() if distributed: torch.cuda.set_device(local_rank) host = os.environ["MASTER_ADDR"] if "MASTER_ADDR" in os.environ else "127.0.0.1" torch.distributed.init_process_group( backend="nccl", init_method='tcp://{}:12345'.format(host), rank=MPI.COMM_WORLD.Get_rank(), world_size=MPI.COMM_WORLD.Get_size() ) synchronize() val_dataloader = make_dataloader(root, val_split, mode, model_name, seq_len=16, #64, overlap=8, #32, phase='val', max_iters=None, batch_per_gpu=batch_per_gpu, num_workers=16, shuffle=False, distributed=distributed, with_normal=False) if model_name == 'i3d': if mode == 'flow': model = InceptionI3d(val_dataloader.dataset.num_classes, in_channels=2, dropout_keep_prob=0.5) else: model = InceptionI3d(val_dataloader.dataset.num_classes, in_channels=3, dropout_keep_prob=0.5) model.replace_logits(val_dataloader.dataset.num_classes) elif model_name == 'r3d_18': model = r3d_18(pretrained=False, num_classes=val_dataloader.dataset.num_classes) elif model_name == 'mc3_18': model = mc3_18(pretrained=False, num_classes=val_dataloader.dataset.num_classes) elif model_name == 'r2plus1d_18': model = r2plus1d_18(pretrained=False, num_classes=val_dataloader.dataset.num_classes) elif model_name == 'c3d': model = C3D(pretrained=False, num_classes=val_dataloader.dataset.num_classes) else: raise NameError('unknown model name:{}'.format(model_name)) # pdb.set_trace() for param in model.parameters(): pass device = torch.device('cuda') model.to(device) if distributed: model = apex.parallel.convert_syncbn_model(model) model = DDP(model.cuda(), delay_allreduce=True)
def __init__(self, in_channels=3, num_classes=3): super(resnet3d, self).__init__() self.r3d = r3d_18(pretrained=True) self.r3d.fc = Linear(512, num_classes)
def __init__(self): super().__init__() self.network = nn.Sequential(*list(r3d_18(pretrained=True).children())[:-1]) for params in self.network.parameters(): params.requires_grad = False print(self.network)
def __init__(self, experiment, device): config_file = os.path.join(CONFIG_DIR, experiment + '.json') assert os.path.exists( config_file), 'config file {} does not exist'.format(config_file) self.experiment = experiment with open(config_file, 'r') as f: configs = json.load(f) self.device = int(device) self.lr = configs['lr'] self.max_epochs = configs['max-epochs'] self.train_batch_size = configs['train-batch-size'] self.test_batch_size = configs['test-batch-size'] self.n_epochs = 0 self.n_test_segments = configs['n-test-segments'] self.log_dir = os.path.join(LOG_DIR, experiment) if not os.path.exists(self.log_dir): os.makedirs(self.log_dir) self.tboard_writer = tensorboardX.SummaryWriter(log_dir=self.log_dir) self.checkpoint_dir = os.path.join(CHECKPOINT_DIR, experiment) if not os.path.exists(self.checkpoint_dir): os.makedirs(self.checkpoint_dir) model_id = configs['model-id'] if model_id == 'r3d': self.model = models.r3d_18(pretrained=True) elif model_id == 'mc3': self.model = models.mc3_18(pretrained=True) elif model_id == 'r2plus1d': self.model = models.r2plus1d_18(pretrained=True) else: raise ValueError('no such model') # replace the last layer. self.model.fc = nn.Linear(self.model.fc.in_features, out_features=breakfast.N_CLASSES, bias=self.model.fc.bias is not None) self.model = self.model.cuda(self.device) self.loss_fn = nn.CrossEntropyLoss().cuda(self.device) if configs['optim'] == 'adam': self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) elif configs['optim'] == 'sgd': self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr, momentum=configs['momentum'], nesterov=configs['nesterov']) else: raise ValueError('no such optimizer') if configs['scheduler'] == 'step': self.scheduler = optim.lr_scheduler.StepLR( self.optimizer, step_size=configs['lr-step'], gamma=configs['lr-decay']) elif configs['scheduler'] == 'plateau': self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode='min', patience=configs['lr-step']) else: raise ValueError('no such scheduler') self._load_checkpoint() self.frame_stride = configs['frame-stride']
def model_class(num_classes): model = r3d_18( pretrained=cfg.get_bool('pretrained', default=False), ) model.fc = nn.Linear(model.fc.in_features, num_classes, model.fc.bias is not None) return model
n_gpu = torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) n_gpu = 1 torch.distributed.init_process_group(backend='nccl', init_method='env://') trainloader = get_video_loader(**vars(args), val=False, transform=train_transform) testloader = get_video_loader(**vars(args), val=True, transform=test_transform) model = r3d_18(pretrained=True) model.fc = nn.Linear(512, 2) model.to(device) if args.linear_model: for p in model.parameters(): p.requires_grad = False for p in model.fc.parameters(): p.requires_grad = True # if args.local_rank <= 0: ipdb.set_trace() if args.fp16: try: # from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam
def main(ckpt, model_name, mode='rgb', root='/home/data/vision7/A3D_2.0/frames/', split_file='A3D_2.0_val.json', split='val', with_normal=True, batch_per_gpu=16, save_dir=''): device = torch.device('cuda') num_gpus = MPI.COMM_WORLD.Get_size() distributed = False if num_gpus > 1: distributed = True local_rank = MPI.COMM_WORLD.Get_rank() % torch.cuda.device_count() # logger must be initialized after distributed! if args.use_wandb: cfg = {'PROJECT': 'i3d_a3d'} logger = Logger( "I3D", cfg, #convert_to_dict(cfg, []), project='i3d_a3d', viz_backend="wandb") save_dir = os.path.join(save_dir, logger.run_id) else: logger = logging.Logger('test_VAR_final') logger.info("Using {} GPUs".format(num_gpus)) if distributed: torch.cuda.set_device(local_rank) host = os.environ[ "MASTER_ADDR"] if "MASTER_ADDR" in os.environ else "127.0.0.1" torch.distributed.init_process_group( backend="nccl", init_method='tcp://{}:12345'.format(host), rank=MPI.COMM_WORLD.Get_rank(), world_size=MPI.COMM_WORLD.Get_size()) synchronize() dataloader = make_dataloader( root, split_file, mode, model_name=model_name, seq_len=16, #64, overlap=15, #32, phase='val', max_iters=None, batch_per_gpu=batch_per_gpu, num_workers=16, shuffle=False, distributed=distributed, with_normal=with_normal) # evaluator = ActionClassificationEvaluator(cfg=None, # dataset=dataloader.dataset, # split='val', # mode='accuracy',#'mAP', # output_dir=save_dir, # with_normal=with_normal) # setup the model # set dropout_keep_prob=0.0 for overfit if model_name == 'i3d': if mode == 'flow': model = InceptionI3d(dataloader.dataset.num_classes, in_channels=2, dropout_keep_prob=0.5) else: model = InceptionI3d(dataloader.dataset.num_classes, in_channels=3, dropout_keep_prob=0.5) model.replace_logits(dataloader.dataset.num_classes) elif model_name == 'r3d_18': model = r3d_18(pretrained=False, num_classes=dataloader.dataset.num_classes) elif model_name == 'mc3_18': model = mc3_18(pretrained=False, num_classes=dataloader.dataset.num_classes) elif model_name == 'r2plus1d_18': model = r2plus1d_18(pretrained=False, num_classes=dataloader.dataset.num_classes) elif model_name == 'c3d': model = C3D(pretrained=False, num_classes=dataloader.dataset.num_classes) else: raise NameError('unknown model name:{}'.format(model_name)) model.load_state_dict(torch.load(ckpt)) # do_test(i3d, dataloader, device, distributed=distributed,logger=logger, output_dir=save_dir, train_iters=0, evaluator=evaluator) model.to(device) model.eval() do_val(model_name, model, dataloader, device, distributed, logger, output_dir=os.path.join('test_output'), train_iters=0)
def __init__(self): super().__init__() self.model = r3d_18(pretrained=False) modules = list(self.model.children())[:-2] self.model = nn.Sequential(*modules)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) n_gpu = 1 torch.distributed.init_process_group( backend='nccl', init_method='env://') trainloader = get_video_loader(**vars(args), val=False, transform=train_transform) testloader = get_video_loader(**vars(args), val=True, transform=test_transform) if args.flow_histogram: model = nn.Sequential(nn.Linear(100, 256), nn.Linear(256, 3)) else: model = r3d_18(pretrained=args.start_from_pretrained) model.fc = nn.Linear(512, 3) if args.selfsup_loss: def patched_forward(self, x): x = self.stem(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) # Flatten the layer to fc x = x.flatten(1) features = x x = self.fc(x)
print('Using apex sync batchnorm') except: print('Using pytorch sync batchnorm') convert_sync_batchnorm = torch.nn.SyncBatchNorm.convert_sync_batchnorm else: print('Using pytorch sync batchnorm') convert_sync_batchnorm = torch.nn.SyncBatchNorm.convert_sync_batchnorm # tok = BertTokenizer.from_pretrained('bert-base-uncased') model = AhaModel(args) if args.local_rank > -1: model = convert_sync_batchnorm(model) model.to(device) if args.load_cvpr_model: cnn = r3d_18(pretrained=args.from_pretrained) cnn.fc = nn.Linear(512, 3) model.video_transformer.embeddings.clip_embeddings = cnn.to(device) if args.freeze_cnn: for n, p in model.named_parameters(): p.requires_grad = 'clip_embeddings' not in n if args.svo: for n, p in model.named_parameters(): p.requires_grad = n.startswith('svo') if args.svo_freeze_embs: for p in model.svo_decoder_embs.parameters(): p.requires_grad = False if (args.svo and args.svo_cnn_only) or args.load_cvpr_model: