def generate_model(opt): assert opt.model in [ 'resnet', 'resnetl', 'resnext', 'c3d', 'mobilenetv2', 'shufflenetv2', "mstcn" ] if opt.model == 'resnet': assert opt.model_depth in [10, 50] from models.resnet import get_fine_tuning_parameters if opt.model_depth == 10: model = resnet.resnet10(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.sample_duration) elif opt.model_depth == 50: model = resnet.resnet50(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.sample_duration) elif opt.model == 'resnetl': assert opt.model_depth in [10, 18] from models.resnetl import get_fine_tuning_parameters if opt.model_depth == 10: model = resnetl.resnetl10(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.sample_duration) elif opt.model_depth == 18: model = resnetl.resnetl10(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.sample_duration) elif opt.model == 'resnext': assert opt.model_depth in [101] from models.resnext import get_fine_tuning_parameters if opt.model_depth == 101: model = resnext.resnet101(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, cardinality=opt.resnext_cardinality, sample_size=opt.sample_size, sample_duration=opt.sample_duration) elif opt.model == 'c3d': assert opt.model_depth in [10] from models.c3d import get_fine_tuning_parameters if opt.model_depth == 10: model = c3d.c3d_v1(sample_size=opt.sample_size, sample_duration=opt.sample_duration, num_classes=opt.n_classes) elif opt.model == 'mobilenetv2': from models.mobilenetv2 import get_fine_tuning_parameters model = mobilenetv2.mob_v2(num_classes=opt.n_classes, sample_size=opt.sample_size, width_mult=opt.width_mult) elif opt.model == 'shufflenetv2': from models.shufflenetv2 import get_fine_tuning_parameters model = shufflenetv2.shf_v2(num_classes=opt.n_classes, sample_size=opt.sample_size, width_mult=opt.width_mult) elif opt.model == "mstcn": model = MultiStageTemporalConvNet( embed_size=opt.embedding_dim, encoder=opt.tcn_encoder, n_classes=opt.n_classes, input_size=(opt.sample_size, opt.sample_size), input_channels=4 if opt.modality != "RGB" else 3, num_stages=opt.tcn_stages, causal_config=opt.tcn_causality, CTHW_layout=True, use_preprocessing=opt.use_preprocessing) if not opt.no_cuda: if opt.pretrain_path: print('loading pretrained model {}'.format(opt.pretrain_path)) pretrain = torch.load(opt.pretrain_path) assert opt.arch == pretrain['arch'] if opt.pretrain_dataset == 'jester': if opt.sample_duration < 32 and opt.model != 'c3d': model = _modify_first_conv_layer(model, 3, 3) if opt.model in ['mobilenetv2', 'shufflenetv2']: del pretrain['state_dict']['module.classifier.1.weight'] del pretrain['state_dict']['module.classifier.1.bias'] else: del pretrain['state_dict']['module.fc.weight'] del pretrain['state_dict']['module.fc.bias'] model.load_state_dict(pretrain['state_dict'], strict=False) if opt.modality in ['RGB', 'flo'] and opt.model != 'c3d': print("[INFO]: RGB model is used for init model") if opt.dataset != 'jester' and not opt.no_first_lay: model = _modify_first_conv_layer( model, 3, 3) ##### Check models trained (3,7,7) or (7,7,7) elif opt.modality in ['Depth', 'seg']: print( "[INFO]: Converting the pretrained model to Depth init model") model = _construct_depth_model(model) print("[INFO]: Done. Flow model ready.") elif opt.modality in ['RGB-D', 'RGB-flo', 'RGB-seg']: if opt.model != "mstcn": print( "[INFO]: Converting the pretrained model to RGB+D init model" ) model = _construct_rgbdepth_model(model) if opt.no_first_lay: model = _modify_first_conv_layer( model, 3, 4) ##### Check models trained (3,7,7) or (7,7,7) print("[INFO]: Done. RGB-D model ready.") if opt.pretrain_dataset == opt.dataset: model.load_state_dict(pretrain['state_dict']) elif opt.pretrain_dataset in ['egogesture', 'nv', 'denso']: del pretrain['state_dict']['module.fc.weight'] del pretrain['state_dict']['module.fc.bias'] model.load_state_dict(pretrain['state_dict'], strict=False) # Check first kernel size if opt.model != "mstcn": modules = list(model.modules()) first_conv_idx = list( filter(lambda x: isinstance(modules[x], nn.Conv3d), list(range(len(modules)))))[0] conv_layer = modules[first_conv_idx] if conv_layer.kernel_size[0] > opt.sample_duration: print("[INFO]: RGB model is used for init model") model = _modify_first_conv_layer(model, int(opt.sample_duration / 2), 1) if opt.model == 'c3d': # CHECK HERE model.module.fc = nn.Linear(model.module.fc[0].in_features, model.module.fc[0].out_features) model.module.fc = model.module.fc.cuda() elif opt.model in ['mobilenetv2', 'shufflenetv2']: model.module.classifier = nn.Sequential( nn.Dropout(0.9), nn.Linear(model.module.classifier[1].in_features, opt.n_finetune_classes)) model.module.classifier = model.module.classifier.cuda() elif opt.model != "mstcn": model.module.fc = nn.Linear(model.module.fc.in_features, opt.n_finetune_classes) model.module.fc = model.module.fc.cuda() if opt.model != "mstcn": parameters = get_fine_tuning_parameters(model, opt.ft_begin_index) else: parameters = model.trainable_parameters() model = nn.DataParallel(model, device_ids=None) model = model.cuda() return model, parameters else: print('ERROR no cuda') return model, model.parameters()
def generate_model(opt): assert opt.model in ['resnet', 'resnetl', 'resnext', 'c3d'] if opt.model == 'resnet': assert opt.model_depth in [10] from models.resnet import get_fine_tuning_parameters if opt.model_depth == 10: model = resnet.resnet10(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.sample_duration) elif opt.model == 'resnetl': assert opt.model_depth in [10] from models.resnetl import get_fine_tuning_parameters if opt.model_depth == 10: model = resnetl.resnetl10(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.sample_duration) elif opt.model == 'resnext': assert opt.model_depth in [101] from models.resnext import get_fine_tuning_parameters if opt.model_depth == 101: model = resnext.resnet101(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, cardinality=opt.resnext_cardinality, sample_size=opt.sample_size, sample_duration=opt.sample_duration) elif opt.model == 'c3d': assert opt.model_depth in [10] from models.c3d import get_fine_tuning_parameters if opt.model_depth == 10: model = c3d.c3d_v1(sample_size=opt.sample_size, sample_duration=opt.sample_duration, num_classes=opt.n_classes) if not opt.no_cuda: model = model.cuda() model = nn.DataParallel(model, device_ids=None) if opt.pretrain_path: print('loading pretrained model {}'.format(opt.pretrain_path)) pretrain = torch.load(opt.pretrain_path) assert opt.arch == pretrain['arch'] model.load_state_dict(pretrain['state_dict']) if opt.model == 'c3d': # CHECK HERE model.module.fc = nn.Linear(model.module.fc[0].in_features, opt.n_finetune_classes) else: model.module.fc = nn.Linear(model.module.fc.in_features, opt.n_finetune_classes) model.module.fc = model.module.fc.cuda() if opt.modality == 'RGB' and opt.model != 'c3d': print("[INFO]: RGB model is used for init model") model = _modify_first_conv_layer( model, 3, 3) ##### Check models trained (3,7,7) or (7,7,7) elif opt.modality == 'Depth': print( "[INFO]: Converting the pretrained model to Depth init model") model = _construct_depth_model(model) print("[INFO]: Done. Flow model ready.") elif opt.modality == 'RGB-D': print( "[INFO]: Converting the pretrained model to RGB+D init model") model = _construct_rgbdepth_model(model) print("[INFO]: Done. RGB-D model ready.") modules = list(model.modules()) first_conv_idx = list( filter(lambda x: isinstance(modules[x], nn.Conv3d), list(range(len(modules)))))[0] conv_layer = modules[first_conv_idx] if conv_layer.kernel_size[0] > opt.sample_duration: model = _modify_first_conv_layer(model, int(opt.sample_duration / 2), 1) parameters = get_fine_tuning_parameters(model, opt.ft_begin_index) return model, parameters else: if opt.pretrain_path: print('loading pretrained model {}'.format(opt.pretrain_path)) pretrain = torch.load(opt.pretrain_path) assert opt.arch == pretrain['arch'] model.load_state_dict(pretrain['state_dict']) if opt.modality == 'RGB' and opt.model != 'c3d': print("[INFO]: RGB model is used for init model") model = _modify_first_conv_layer(model, 3, 3) elif opt.modality == 'Depth': print( "[INFO]: Converting the pretrained model to Depth init model") model = _construct_depth_model(model) print("[INFO]: Deoth model ready.") elif opt.modality == 'RGB-D': print( "[INFO]: Converting the pretrained model to RGB-D init model") model = _construct_rgbdepth_model(model) print("[INFO]: Done. RGB-D model ready.") modules = list(model.modules()) first_conv_idx = list( filter(lambda x: isinstance(modules[x], nn.Conv3d), list(range(len(modules)))))[0] conv_layer = modules[first_conv_idx] if conv_layer.kernel_size[0] > opt.sample_duration: print("[INFO]: RGB model is used for init model") model = _modify_first_conv_layer(model, int(opt.sample_duration / 2), 1) if opt.model == 'c3d': # CHECK HERE model.fc = nn.Linear(model.fc[0].in_features, model.fc[0].out_features) else: model.fc = nn.Linear(model.fc.in_features, opt.n_finetune_classes) parameters = get_fine_tuning_parameters(model, opt.ft_begin_index) return model, parameters
def generate_model(opt, modality): assert opt.model in ['resnet', 'resnetl', 'resnext', 'c3d'] if opt.model == 'resnet': # assert opt.model_depth in [10] from models.resnet import get_fine_tuning_parameters if opt.model_depth == 10: model = resnet.resnet10(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.n_frames_per_clip) elif opt.model_depth == 18: model = resnet.resnet18(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.n_frames_per_clip) elif opt.model_depth == 50: model = resnet.resnet50(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.n_frames_per_clip) elif opt.model == 'resnetl': assert opt.model_depth in [10] from models.resnetl import get_fine_tuning_parameters if opt.model_depth == 10: model = resnetl.resnetl10(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, sample_size=opt.sample_size, sample_duration=opt.n_frames_per_clip) elif opt.model == 'resnext': assert opt.model_depth in [101] from models.resnext import get_fine_tuning_parameters if opt.model_depth == 101: model = resnext.resnet101(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, cardinality=opt.resnext_cardinality, sample_size=opt.sample_size, sample_duration=opt.n_frames_per_clip) elif opt.model == 'c3d': assert opt.model_depth in [10] from models.c3d import get_fine_tuning_parameters if opt.model_depth == 10: model = c3d.c3d_v1(sample_size=opt.sample_size, sample_duration=opt.n_frames_per_clip, num_classes=opt.n_classes) if not opt.no_cuda: model = model.cuda() model = nn.DataParallel(model, device_ids=None) if opt.pretrain_path: print('loading pretrained model {}'.format(opt.pretrain_path)) pretrain = torch.load(opt.pretrain_path, map_location='cpu') # assert opt.arch == pretrain['arch'] feature_state_dict = { key: value for key, value in pretrain['state_dict'].items() if key not in ['module.fc.weight', 'module.fc.bias'] } state_dict = deepcopy(model.state_dict()) state_dict.update(feature_state_dict) model.load_state_dict(state_dict) # model.load_state_dict(pretrain['state_dict']) if modality == 'RGB' and opt.model != 'c3d': print("[INFO]: RGB model is used for init model") # model = _modify_first_conv_layer(model,3,3) ##### Check models trained (3,7,7) or (7,7,7) elif modality == 'Depth': print( "[INFO]: Converting the pretrained model to Depth init model") model = _construct_depth_model(model) print("[INFO]: Done. Flow model ready.") elif modality == 'RGB-D': print( "[INFO]: Converting the pretrained model to RGB+D init model") model = _construct_rgbdepth_model(model) print("[INFO]: Done. RGB-D model ready.") # Check first kernel size modules = list(model.modules()) first_conv_idx = list( filter(lambda x: isinstance(modules[x], nn.Conv3d), list(range(len(modules)))))[0] conv_layer = modules[first_conv_idx] if conv_layer.kernel_size[0] > opt.n_frames_per_clip: print("[INFO]: RGB model is used for init model") model = _modify_first_conv_layer(model, int(opt.n_frames_per_clip / 2), 1) if opt.model == 'c3d': # CHECK HERE model.module.fc = nn.Linear(model.module.fc[0].in_features, model.module.fc[0].out_features) model.module.fc = model.module.fc.cuda() else: model.module.fc = nn.Linear(model.module.fc.in_features, opt.n_finetune_classes) model.module.fc = model.module.fc.cuda() parameters = get_fine_tuning_parameters(model, opt.ft_begin_index) return model, parameters else: if opt.pretrain_path: print('loading pretrained model {}'.format(opt.pretrain_path)) pretrain = torch.load(opt.pretrain_path, map_location='cpu') # assert opt.arch == pretrain['arch'] feature_state_dict = { key.replace('module.', ''): value for key, value in pretrain['state_dict'].items() if key not in ['module.fc.weight', 'module.fc.bias'] } state_dict = deepcopy(model.state_dict()) state_dict.update(feature_state_dict) model.load_state_dict(state_dict) # model.load_state_dict(pretrain['state_dict']) if modality == 'RGB' and opt.model != 'c3d': print("[INFO]: RGB model is used for init model") model = _modify_first_conv_layer(model, 3, 3) elif modality == 'Depth': print( "[INFO]: Converting the pretrained model to Depth init model") model = _construct_depth_model(model) print("[INFO]: Deoth model ready.") elif modality == 'RGB-D': print( "[INFO]: Converting the pretrained model to RGB-D init model") model = _construct_rgbdepth_model(model) print("[INFO]: Done. RGB-D model ready.") # Check first kernel size modules = list(model.modules()) first_conv_idx = list( filter(lambda x: isinstance(modules[x], nn.Conv3d), list(range(len(modules)))))[0] conv_layer = modules[first_conv_idx] if conv_layer.kernel_size[0] > opt.n_frames_per_clip: print("[INFO]: RGB model is used for init model") model = _modify_first_conv_layer(model, int(opt.n_frames_per_clip / 2), 1) if opt.model == 'c3d': # CHECK HERE model.fc = nn.Linear(model.fc[0].in_features, model.fc[0].out_features) else: model.fc = nn.Linear(model.fc.in_features, opt.n_finetune_classes) parameters = get_fine_tuning_parameters(model, opt.ft_begin_index) return model, parameters return model, model.parameters()