def extract_image_feats(video_path): print('extracting image features...') model = resnet152(pretrained='imagenet') model = model.cuda() # model = nn.DataParallel(model) model.last_linear = utils.Identity() model.eval() C, H, W = 3, 224, 224 load_image_fn = utils.LoadTransformImage(model) dst = os.path.join(video_path.split('/')[0], 'info') with open(os.devnull, "w") as ffmpeg_log: command = 'ffmpeg -i ' + video_path + ' -vf scale=400:300 ' + '-qscale:v 2 '+ '{0}/%06d.jpg'.format(dst) subprocess.call(command, shell=True, stdout=ffmpeg_log, stderr=ffmpeg_log) image_list = sorted(glob.glob(os.path.join(dst, '*.jpg'))) samples = np.round(np.linspace(0, len(image_list) - 1, 80)) image_list = [image_list[int(sample)] for sample in samples] images = torch.zeros((len(image_list), C, H, W)) for i in range(len(image_list)): img = load_image_fn(image_list[i]) images[i] = img with torch.no_grad(): image_feats = model(images.cuda().squeeze()) image_feats = image_feats.cpu().numpy() for file in os.listdir(dst): if file.endswith('.jpg'): os.remove(os.path.join(dst, file)) return image_feats
def extract_image_feats(video_path): hasilPred.configure(text="Membuat Prediksi....") model = resnet152(pretrained='imagenet') model = model.cuda() model.last_linear = utils.Identity() model.eval() C, H, W = 3, 224, 224 load_image_fn = utils.LoadTransformImage(model) dst = os.path.join(video_path.split('\\')[0], 'info') if os.path.exists(dst): print(" Menghapus Direktori: " + dst + "\\") shutil.rmtree(dst) os.makedirs(dst) with open(os.devnull, "w") as ffmpeg_log: command = 'ffmpeg -i ' + video_path + ' -vf scale=400:300 ' + '-qscale:v 2 ' + '{0}/%06d.jpg'.format( dst) subprocess.call(command, shell=True, stdout=ffmpeg_log, stderr=ffmpeg_log) list_image = sorted(glob.glob(os.path.join(dst, '*.jpg'))) samples = np.round(np.linspace(0, len(list_image) - 1, 80)) list_image = [list_image[int(sample)] for sample in samples] images = torch.zeros((len(list_image), C, H, W)) for i in range(len(list_image)): img = load_image_fn(list_image[i]) images[i] = img with torch.no_grad(): image_feats = model(images.cuda().squeeze()) image_feats = image_feats.cpu().numpy() for file in os.listdir(dst): if file.endswith('.jpg'): os.remove(os.path.join(dst, file)) return image_feats
def generate_C2D_model(opt): if opt.c2d_model_name == 'inception_v3': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv3(num_classes=1000, pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt.c2d_model_name == 'resnet152': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet152(num_classes=1000, pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt.c2d_model_name == 'inception_v4': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv4(num_classes=1000, pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt.c2d_model_name == 'inceptionresnetv2': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionresnetv2(num_classes=1000, pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) model.last_linear = utils.Identity() if not opt.no_cuda: model = model.to(opt.device) return load_image_fn, model, (C, H, W)
def extract_feats(args): params = args if params['model'] == 'inception_v3': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv3(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif params['model'] == 'resnet152': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet152(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif params['model'] == 'inception_v4': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv4(num_classes=1000, pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) else: print("doesn't support %s" % (params['model'])) model.last_linear = utils.Identity() model = nn.DataParallel(model) model = model.cuda() prepro_feats.extract_feats(params, model, load_image_fn)
def fix_frame_extract(frame_path, feats_path, frames_num, model, video_name): # load model C, H, W = 3, 224, 224 if model == 'resnet152': model = pretrainedmodels.resnet152(pretrained='imagenet') elif model == 'vgg16': model = pretrainedmodels.vgg16(pretrained='imagenet') elif model == 'inception_v4': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv4(pretrained='imagenet') model.last_linear = utils.Identity() model = model.to(device) model.eval() load_image_fn = utils.LoadTransformImage(model) # load data img_list = sorted(frame_path.glob('*.jpg')) # get index samples_ix = np.linspace(0, len(img_list) - 1, frames_num).astype(int) img_list = [img_list[i] for i in samples_ix] # build tensor imgs = torch.zeros([len(img_list), C, H, W]) for i in range(len(img_list)): img = load_image_fn(img_list[i]) imgs[i] = img imgs = imgs.to(device) with torch.no_grad(): feats = model(imgs) feats = feats.cpu().numpy() # save np.save(os.path.join(feats_path, video_name + ".npy"), feats)
def build_model(model_name): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load pretrained model model_name = model_name # could be fbresnet152 or inceptionresnetv2 if (model_name == 'senet154'): model = pretrainedmodels.senet154(pretrained='imagenet') elif (model_name == 'se_resnet152'): model = pretrainedmodels.se_resnet152(pretrained='imagenet') elif (model_name == 'se_resnext101_32x4d'): model = pretrainedmodels.se_resnext101_32x4d(pretrained='imagenet') elif (model_name == 'resnet152'): model = pretrainedmodels.resnet152(pretrained='imagenet') elif (model_name == 'resnet101'): model = pretrainedmodels.resnet101(pretrained='imagenet') elif (model_name == 'densenet201'): model = pretrainedmodels.densenet201(pretrained='imagenet') model.to(device) for param in model.parameters(): param.requires_grad = False num_ftrs = model.last_linear.in_features class CustomModel(nn.Module): def __init__(self, model): super(CustomModel, self).__init__() self.features = nn.Sequential(*list(model.children())[:-1]) self.classifier = nn.Sequential( torch.nn.Linear(num_ftrs, 128), torch.nn.Dropout(0.3), # drop 50% of the neuron torch.nn.Linear(128, 7)) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x model = CustomModel(model) freeze_layer(model.features) num_ftrs = list(model.classifier.children())[-1].out_features model.to(device) model.name = model_name PATH = os.path.abspath(os.path.dirname(__file__)) PATH_par = os.path.abspath(os.path.join(PATH, os.pardir)) path_to_model = os.path.join(PATH_par, 'pretrained_model', '128_7') model.load_state_dict( torch.load(os.path.join(path_to_model, '%s.pth' % (model_name)))) model.to(device) for param in model.parameters(): param.requires_grad = False return model, num_ftrs
def get_resnet152(): model = resnet152(pretrained=True) w = model.conv1.weight model.conv1 = nn.Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) model.conv1.weight = torch.nn.Parameter( torch.cat((w, torch.mean(w, dim=1).unsqueeze(1)), dim=1)) model.avgpool = nn.Sequential( nn.MaxPool2d(kernel_size=16, stride=2, padding=0), ) model.fc = nn.Sequential(nn.Dropout(), nn.Linear(model.fc.in_features, 28)) return model
def generate_2D_model(opt): if opt['model'] == 'inception_v3': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv3(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt['model'] == 'vgg16': C, H, W = 3, 224, 224 model = pretrainedmodels.vgg16(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt['model'] == 'vgg19': C, H, W = 3, 224, 224 model = pretrainedmodels.vgg19(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt['model'] == 'resnet50': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet50(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt['model'] == 'resnet101': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet101(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt['model'] == 'resnet152': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet152(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt['model'] == 'inception_v4': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv4(num_classes=1000, pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt['model'] == 'nasnet': C, H, W = 3, 331, 331 model = pretrainedmodels.nasnetalarge(num_classes=1001, pretrained='imagenet+background') load_image_fn = utils.LoadTransformImage(model) else: print("doesn't support %s" % (opt['model'])) model.last_linear = utils.Identity() model = nn.DataParallel(model) # if opt['saved_model'] != '': # model.load_state_dict(torch.load(opt['saved_model']), strict=False) model = model.cuda() return model
def resnet152(input_size=(3, 224, 224), num_classes=1000, pretrained=None): model = models.resnet152(pretrained=pretrained) model = add_instances_to_torchvisionmodel(model) # Change the First Convol2D layer into new input shape if input_size != (3, 224, 224): model.conv1 = nn.Conv2d(input_size[0], 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) model.input_size = input_size del model.fc del model.avgpool # calc kernel_size on new_avgpool2d layer test_tensor = torch.randn((1, input_size[0], input_size[1], input_size[2])) features = model.features(test_tensor) # print(features, features.shape[2], features.shape[3]) avg_pool2d_kernel_size = (features.shape[2], features.shape[3]) # calc last linear size x = F.avg_pool2d(features, kernel_size=avg_pool2d_kernel_size) x = x.view(x.size(0), -1).shape[1] model.last_linear = nn.Linear(in_features=x, out_features=num_classes) #del model.logits #del model.forward def logits(self, features): x = F.relu(features, inplace=False) x = F.avg_pool2d(x, kernel_size=avg_pool2d_kernel_size, stride=1) x = x.view(x.size(0), -1) x = self.last_linear(x) return x def forward(self, input): x = self.features(input) x = self.logits(x) return x model.logits = types.MethodType(logits, model) model.forward = types.MethodType(forward, model) return model
def extract_feats(frame_path, feats_path, interval, model, video_name): """ extract feature from frames of one video :param video_name: :param model: name of model :param frame_path: path of frames :param feats_path: path to store results :param interval: (str) The interval when extract frames from videos :return: None """ # load model C, H, W = 3, 224, 224 if model == 'resnet152': model = pretrainedmodels.resnet152(pretrained='imagenet') elif model == 'vgg16': model = pretrainedmodels.vgg16(pretrained='imagenet') elif model == 'inception_v4': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv4(pretrained='imagenet') model.last_linear = utils.Identity() model = model.to(device) model.eval() load_image_fn = utils.LoadTransformImage(model) # load data img_list = sorted(frame_path.glob('*.jpg')) # get index samples_ix = np.arange(0, len(img_list), interval) img_list = [img_list[int(i)] for i in samples_ix] # build tensor imgs = torch.zeros([len(img_list), C, H, W]) for i in range(len(img_list)): img = load_image_fn(img_list[i]) imgs[i] = img imgs = imgs.to(device) with torch.no_grad(): feats = model(imgs) feats = feats.cpu().numpy() # save np.save(os.path.join(feats_path, video_name + ".npy"), feats)
def main(args): global C, H, W coco_labels = json.load(open(args.coco_labels)) num_classes = coco_labels['num_classes'] if args.model == 'inception_v3': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv3(pretrained='imagenet') elif args.model == 'resnet152': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet152(pretrained='imagenet') elif args.model == 'inception_v4': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv4(num_classes=1000, pretrained='imagenet') else: print("doesn't support %s" % (args['model'])) load_image_fn = utils.LoadTransformImage(model) dim_feats = model.last_linear.in_features model = MILModel(model, dim_feats, num_classes) model = model.cuda() dataset = CocoDataset(coco_labels) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) exp_lr_scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=args.learning_rate_decay_every, gamma=args.learning_rate_decay_rate) crit = nn.MultiLabelSoftMarginLoss() if not os.path.isdir(args.checkpoint_path): os.mkdir(args.checkpoint_path) train(dataloader, model, crit, optimizer, exp_lr_scheduler, load_image_fn, args)
def build_model(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load pretrained model model_name = 'resnet152' model = pretrainedmodels.resnet152(pretrained='imagenet') model.to(device) for param in model.parameters(): param.requires_grad = False num_ftrs = model.last_linear.in_features class CustomModel(nn.Module): def __init__(self, model): super(CustomModel, self).__init__() self.features = nn.Sequential(*list(model.children())[:-1]) self.classifier = nn.Sequential( torch.nn.Linear(num_ftrs, 128), torch.nn.Dropout(0.3), # drop 50% of the neuron torch.nn.Linear(128, 7)) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x model = CustomModel(model) freeze_layer(model.features) num_ftrs = list(model.classifier.children())[-1].out_features model.load_state_dict(torch.load('resnet152.pth')) model.to(device) model.name = model_name return model, num_ftrs
params['feat_path'], params['feat_name'] + ('' if '.hdf5' in params['feat_name'] else '.hdf5')) params['logit_dir'] = os.path.join( params['feat_path'], params['logit_name'] + ('' if '.hdf5' in params['logit_name'] else '.hdf5')) print('Model: %s' % params['model']) print('The extracted features will be saved to --> %s' % params['feat_dir']) if params['model'] == 'resnet101': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet101(pretrained='imagenet') elif params['model'] == 'resnet152': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet152(pretrained='imagenet') elif params['model'] == 'resnet18': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet18(pretrained='imagenet') elif params['model'] == 'resnet34': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet34(pretrained='imagenet') elif params['model'] == 'inceptionresnetv2': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionresnetv2( num_classes=1001, pretrained='imagenet+background') elif params['model'] == 'googlenet': C, H, W = 3, 224, 224 model = googlenet(pretrained=True) print(model) else:
def __init__(self, backbone, heads, head_conv=128, num_filters=[256, 256, 256], pretrained=True, dcn=False, gn=False, ws=False, freeze_bn=False, after_non_local='layer1', non_local_hidden_channels=None): super().__init__() self.heads = heads if backbone == 'resnet18': pretrained = 'imagenet' if pretrained else None self.backbone = pretrainedmodels.resnet18(pretrained=pretrained) num_bottleneck_filters = 512 elif backbone == 'resnet34': pretrained = 'imagenet' if pretrained else None self.backbone = pretrainedmodels.resnet34(pretrained=pretrained) num_bottleneck_filters = 512 elif backbone == 'resnet50': pretrained = 'imagenet' if pretrained else None self.backbone = pretrainedmodels.resnet50(pretrained=pretrained) num_bottleneck_filters = 2048 elif backbone == 'resnet101': pretrained = 'imagenet' if pretrained else None self.backbone = pretrainedmodels.resnet101(pretrained=pretrained) num_bottleneck_filters = 2048 elif backbone == 'resnet152': pretrained = 'imagenet' if pretrained else None self.backbone = pretrainedmodels.resnet152(pretrained=pretrained) num_bottleneck_filters = 2048 elif backbone == 'se_resnext50_32x4d': pretrained = 'imagenet' if pretrained else None self.backbone = pretrainedmodels.se_resnext50_32x4d( pretrained=pretrained) num_bottleneck_filters = 2048 elif backbone == 'se_resnext101_32x4d': pretrained = 'imagenet' if pretrained else None self.backbone = pretrainedmodels.se_resnext101_32x4d( pretrained=pretrained) num_bottleneck_filters = 2048 elif backbone == 'resnet34_v1b': self.backbone = timm.create_model('gluon_resnet34_v1b', pretrained=pretrained) convert_to_inplace_relu(self.backbone) num_bottleneck_filters = 512 elif backbone == 'resnet50_v1d': self.backbone = timm.create_model('gluon_resnet50_v1d', pretrained=pretrained) convert_to_inplace_relu(self.backbone) num_bottleneck_filters = 2048 elif backbone == 'resnet101_v1d': self.backbone = timm.create_model('gluon_resnet101_v1d', pretrained=pretrained) convert_to_inplace_relu(self.backbone) num_bottleneck_filters = 2048 elif backbone == 'resnext50_32x4d': self.backbone = timm.create_model('resnext50_32x4d', pretrained=pretrained) convert_to_inplace_relu(self.backbone) num_bottleneck_filters = 2048 elif backbone == 'resnext50d_32x4d': self.backbone = timm.create_model('resnext50d_32x4d', pretrained=pretrained) convert_to_inplace_relu(self.backbone) num_bottleneck_filters = 2048 elif backbone == 'seresnext26_32x4d': self.backbone = timm.create_model('seresnext26_32x4d', pretrained=pretrained) convert_to_inplace_relu(self.backbone) num_bottleneck_filters = 2048 elif backbone == 'resnet18_ctdet': self.backbone = models.resnet18() state_dict = torch.load( 'pretrained_weights/ctdet_coco_resdcn18.pth')['state_dict'] self.backbone.load_state_dict(state_dict, strict=False) num_bottleneck_filters = 512 elif backbone == 'resnet50_maskrcnn': self.backbone = models.detection.maskrcnn_resnet50_fpn( pretrained=pretrained).backbone.body print(self.backbone) num_bottleneck_filters = 2048 else: raise NotImplementedError if after_non_local is not None: self.after_non_local = after_non_local in_channels = getattr(self.backbone, after_non_local)[0].conv1.in_channels if non_local_hidden_channels is None: non_local_hidden_channels = in_channels // 2 self.non_local = NonLocal2d(in_channels, non_local_hidden_channels) if freeze_bn: for m in self.backbone.modules(): if isinstance(m, nn.BatchNorm2d): m.weight.requires_grad = False m.bias.requires_grad = False self.lateral4 = nn.Sequential( Conv2d(num_bottleneck_filters, num_filters[0], kernel_size=1, bias=False, ws=ws), nn.GroupNorm(32, num_filters) if gn else nn.BatchNorm2d(num_filters[0]), nn.ReLU(inplace=True)) self.lateral3 = nn.Sequential( Conv2d(num_bottleneck_filters // 2, num_filters[0], kernel_size=1, bias=False, ws=ws), nn.GroupNorm(32, num_filters[0]) if gn else nn.BatchNorm2d(num_filters[0]), nn.ReLU(inplace=True)) self.lateral2 = nn.Sequential( Conv2d(num_bottleneck_filters // 4, num_filters[1], kernel_size=1, bias=False, ws=ws), nn.GroupNorm(32, num_filters[1]) if gn else nn.BatchNorm2d(num_filters[1]), nn.ReLU(inplace=True)) self.lateral1 = nn.Sequential( Conv2d(num_bottleneck_filters // 8, num_filters[2], kernel_size=1, bias=False, ws=ws), nn.GroupNorm(32, num_filters) if gn else nn.BatchNorm2d(num_filters[2]), nn.ReLU(inplace=True)) self.decode3 = nn.Sequential( DCN(num_filters[0], num_filters[1], kernel_size=3, padding=1, stride=1) if dcn else \ Conv2d(num_filters[0], num_filters[1], kernel_size=3, padding=1, bias=False, ws=ws), nn.GroupNorm(32, num_filters[1]) if gn else nn.BatchNorm2d(num_filters[1]), nn.ReLU(inplace=True)) self.decode2 = nn.Sequential( Conv2d(num_filters[1], num_filters[2], kernel_size=3, padding=1, bias=False, ws=ws), nn.GroupNorm(32, num_filters[2]) if gn else nn.BatchNorm2d(num_filters[2]), nn.ReLU(inplace=True)) self.decode1 = nn.Sequential( Conv2d(num_filters[2], num_filters[2], kernel_size=3, padding=1, bias=False, ws=ws), nn.GroupNorm(32, num_filters[2]) if gn else nn.BatchNorm2d(num_filters[2]), nn.ReLU(inplace=True)) for head in sorted(self.heads): num_output = self.heads[head] fc = nn.Sequential( Conv2d(num_filters[2], head_conv, kernel_size=3, padding=1, bias=False, ws=ws), nn.GroupNorm(32, head_conv) if gn else nn.BatchNorm2d(head_conv), nn.ReLU(inplace=True), nn.Conv2d(head_conv, num_output, kernel_size=1)) if 'hm' in head: fc[-1].bias.data.fill_(-2.19) else: fill_fc_weights(fc) self.__setattr__(head, fc)
def __init__(self): super(FeatureExtractor, self).__init__() self.model = pretrainedmodels.resnet152() self.FEAT_SIZE = 2048
def build_model(model_name): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load pretrained model model_name = model_name # could be fbresnet152 or inceptionresnetv2 if(model_name == 'senet154'): model = pretrainedmodels.senet154(pretrained='imagenet') elif(model_name == 'se_resnet152'): model = pretrainedmodels.se_resnet152(pretrained='imagenet') elif(model_name == 'se_resnext101_32x4d'): model = pretrainedmodels.se_resnext101_32x4d(pretrained='imagenet') elif(model_name == 'resnet152'): model = pretrainedmodels.resnet152(pretrained='imagenet') elif(model_name == 'resnet101'): model = pretrainedmodels.resnet101(pretrained='imagenet') elif(model_name == 'densenet201'): model = pretrainedmodels.densenet201(pretrained='imagenet') model.to(device) for param in model.parameters(): param.requires_grad = False num_ftrs = model.last_linear.in_features class CustomModel(nn.Module): def __init__(self, model): super(CustomModel, self).__init__() self.features = nn.Sequential(*list(model.children())[:-1] ) self.classifier = nn.Sequential( torch.nn.Linear(num_ftrs, 128), torch.nn.Dropout(0.3), # drop 50% of the neuron torch.nn.Linear(128, 7) ) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x model = CustomModel(model) freeze_layer(model.features) model.to(device) for param in model.parameters(): param.requires_grad = False class CustomModel1(nn.Module): def __init__(self, model): super(CustomModel1, self).__init__() self.features = nn.Sequential(*list(model.children())[:-1]) self.classifier = nn.Sequential( *[list(model.classifier.children())[i] for i in [0]] ) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x CustomModel = CustomModel1(model) num_ftrs = list(CustomModel.classifier.children())[-1].out_features CustomModel.to(device) return CustomModel, num_ftrs
def __init__(self, config_file: Optional[str] = None, override_list: List[Any] = []): _C = CN() _C.VALID_IMAGES = [ 'CXR1576_IM-0375-2001.png', 'CXR1581_IM-0378-2001.png', 'CXR3177_IM-1497-2001.png', 'CXR2585_IM-1082-1001.png', 'CXR1125_IM-0082-1001.png', 'CXR3_IM-1384-2001.png', 'CXR1565_IM-0368-1001.png', 'CXR1105_IM-0072-1001-0001.png', 'CXR2874_IM-1280-1001.png', 'CXR1886_IM-0574-1001.png' ] _C.MODELS = [{ 'resnet18': (pretrainedmodels.resnet18(pretrained=None), 512, 224), 'resnet50': (pretrainedmodels.resnet50(pretrained=None), 2048, 224), 'resnet101': (pretrainedmodels.resnet101(pretrained=None), 2048, 224), 'resnet152': (pretrainedmodels.resnet152(pretrained=None), 2048, 224), 'inception_resnet_v2': (pretrainedmodels.inceptionresnetv2(pretrained=None), 1536, 299) }] # _C.MODELS_FEATURE_SIZE = {'resnet18':512, 'resnet50':2048, 'resnet101':2048, 'resnet152':2048, # 'inception_v3':2048, 'inception_resnet_v2':1536} # Random seed for NumPy and PyTorch, important for reproducibility. _C.RANDOM_SEED = 42 # Opt level for mixed precision training using NVIDIA Apex. This can be # one of {0, 1, 2}. Refer NVIDIA Apex docs for their meaning. _C.FP16_OPT = 2 # Path to the dataset root, which structure as per README. Path is # assumed to be relative to project root. _C.IMAGE_PATH = '/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/Images_2' _C.TRAIN_JSON_PATH = '/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/iu_xray_train_2.json' _C.VAL_JSON_PATH = '/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/iu_xray_val_2.json' _C.TEST_JSON_PATH = '/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/iu_xray_test_2.json' _C.PRETRAINED_EMDEDDING = False # Path to .vocab file generated by ``sentencepiece``. _C.VOCAB_FILE_PATH = "/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/Vocab/indiana.vocab" # Path to .model file generated by ``sentencepiece``. _C.VOCAB_MODEL_PATH = "/netscratch/gsingh/MIMIC_CXR/DataSet/Indiana_Chest_XRay/Vocab/indiana.model" _C.VOCAB_SIZE = 3000 _C.EPOCHS = 1024 _C.BATCH_SIZE = 10 _C.TEST_BATCH_SIZE = 100 _C.ITERATIONS_PER_EPOCHS = 1 _C.WEIGHT_DECAY = 1e-5 _C.NUM_LABELS = 41 _C.IMAGE_SIZE = 299 _C.MAX_SEQUENCE_LENGTH = 130 _C.DROPOUT_RATE = 0.1 _C.D_HEAD = 64 _C.TRAIN_DATASET_LENGTH = 25000 _C.INFERENCE_TIME = False _C.COMBINED_N_LAYERS = 1 _C.BEAM_SIZE = 50 _C.PADDING_INDEX = 0 _C.EOS_INDEX = 3 _C.SOS_INDEX = 2 _C.USE_BEAM_SEARCH = True _C.EXTRACTED_FEATURES = False _C.IMAGE_MODEL_PATH = '/netscratch/gsingh/MIMIC_CXR/Results/Image_Feature_Extraction/MIMIC_CXR_No_ES/model.pth' _C.EMBEDDING_DIM = 8192 _C.CONTEXT_SIZE = 1024 _C.LR_COMBINED = 1e-4 _C.MAX_LR = 1e-1 _C.SAVED_DATASET = False _C.MODEL_NAME = 'inception_resnet_v2' INIT_PATH = '/netscratch/gsingh/MIMIC_CXR/Results/Modified_Transformer/Indiana_15_10_2020_2/' _C.SAVED_DATASET_PATH_TRAIN = INIT_PATH + 'DataSet/train_dataloader.pth' _C.SAVED_DATASET_PATH_VAL = INIT_PATH + 'DataSet/val_dataloader.pth' _C.SAVED_DATASET_PATH_TEST = INIT_PATH + 'DataSet/test_dataloader.pth' _C.CHECKPOINT_PATH = INIT_PATH + 'CheckPoints' _C.MODEL_PATH = INIT_PATH + 'combined_model.pth' _C.MODEL_STATE_DIC = INIT_PATH + 'combined_model_state_dic.pth' _C.FIGURE_PATH = INIT_PATH + 'Graphs' _C.CSV_PATH = INIT_PATH _C.TEST_CSV_PATH = INIT_PATH + 'test_output_image_feature_input.json' self._C = _C if config_file is not None: self._C.merge_from_file(config_file) self._C.merge_from_list(override_list) self.add_derived_params() # Make an instantiated object of this class immutable. self._C.freeze()
def Model_builder(configer): model_name = configer.model['name'] No_classes = configer.dataset_cfg["id_cfg"]["num_classes"] model_pretrained = configer.model['pretrained'] model_dataparallel = configer.model["DataParallel"] model_gpu_replica = configer.model["Multi_GPU_replica"] gpu_ids = configer.train_cfg["gpu"] if model_name == "Inceptionv3": model = PM.inceptionv3(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "Xception": model = PM.xception(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "VGG_19": model = PM.vgg19(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "Resnet18": model = PM.resnet18(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "Resnet50": model = PM.resnet50(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "Resnet101": model = PM.resnet101(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "Resnet152": model = PM.resnet152(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "Resnet34": model = PM.resnet34(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "Densenet121": model = PM.densenet121(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "ResNeXt101-32": model = PM.resnext101_32x4d(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "ResNeXt101-64": model = PM.resnext101_64x4d(num_classes=1000, pretrained=model_pretrained) d = model.last_linear.in_features model.last_linear = nn.Linear(d, No_classes) elif model_name == "MobilenetV2": model = MobileNetV2(n_class=No_classes) else: raise ImportError("Model Architecture not supported") # Performing Data Parallelism if configured if model_dataparallel: model = torch.nn.DataParallel(model.to(device), device_ids=gpu_ids) elif model_gpu_replica: torch.distributed.init_process_group(backend='nccl', world_size=1, rank=1) model = torch.nn.DistributedDataParallel(model.to(device), device_ids=gpu_ids) else: model = model.to(device) print('---------- Model Loaded') return model
args.device = torch.device( 'cuda:' + str(args.gpu_id) if torch.cuda.is_available() else 'cpu') args.output_dir = os.path.join(args.output_dir, args.model) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu params = vars(args) if params['model'] == 'inception_v3': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv3(num_classes=1000, pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif params['model'] == 'resnet152': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet152(num_classes=1000, pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif params['model'] == 'inception_v4': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv4(num_classes=1000, pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif params['model'] == 'inceptionresnetv2': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionresnetv2(num_classes=1000, pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) else:
def model_152(pretrained=True, **kwargs): return pretrainedmodels.resnet152(num_classes=1000, pretrained='imagenet')
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--video_dir', type=str, default='../msrvtt_2017/train-video', help='The video dir that one would like to extract audio file from') parser.add_argument('--output_dir', type=str, default='../msrvtt_2017/preprocessed', help='The file output directory') parser.add_argument( '--output_channels', type=int, default=1, help='The number of output audio channels, default to 1') parser.add_argument( '--output_frequency', type=int, default=16000, help='The output audio frequency in Hz, default to 16000') parser.add_argument( '--band_width', type=int, default=160, help= 'Bandwidth specified to sample the audio (unit in kbps), default to 160' ) parser.add_argument( '--model', type=str, default='resnet152', help= 'The pretrained model to use for extracting image features, default to resnet152' ) parser.add_argument('--gpu', type=str, default='0', help='The CUDA_VISIBLE_DEVICES argument, default to 0') parser.add_argument( '--n_frame_steps', type=int, default=80, help='The number of frames to extract from a single video') opt = parser.parse_args() opt = vars(opt) if not os.path.exists(opt['output_dir']): os.mkdir(opt['output_dir']) vToA(opt) split_audio(opt) print('cleaning up original .wav files...') dir = opt['output_dir'] dir = os.listdir(dir) for file in dir: if file.endswith('.wav'): os.remove(os.path.join(opt['output_dir'], file)) os.environ['CUDA_VISIBLE_DEVICES'] = opt['gpu'] if opt['model'] == 'resnet152': C, H, W = 3, 224, 224 model = pretrainedmodels.resnet152(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt['model'] == 'inception_v3': C, H, W = 3, 299, 299 model = pretrainedmodels.inceptionv3(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) elif opt['model'] == 'vgg16': C, H, W = 3, 224, 224 model = pretrainedmodels.vgg16(pretrained='imagenet') load_image_fn = utils.LoadTransformImage(model) else: print('The image model is not supported') model.last_linear = utils.Identity() model = nn.DataParallel(model) model = model.cuda() extract_image_feats(opt, model, load_image_fn)