コード例 #1
0
def extract_feats(args):
    params = args
    if params['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    else:
        print("doesn't support %s" % (params['model']))

    model.last_linear = utils.Identity()
    model = nn.DataParallel(model)
    model = model.cuda()
    prepro_feats.extract_feats(params, model, load_image_fn)
コード例 #2
0
def generate_C2D_model(opt):
    if opt.c2d_model_name == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif opt.c2d_model_name == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif opt.c2d_model_name == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif opt.c2d_model_name == 'inceptionresnetv2':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionresnetv2(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    model.last_linear = utils.Identity()

    if not opt.no_cuda:
        model = model.to(opt.device)

    return load_image_fn, model, (C, H, W)
def inceptionv3():
    my_inception_v3 = pretrainedmodels.inceptionv3(1000, pretrained='imagenet')
    # my_inception_v3 = torchvision.models.inception_v3(pretrained=True)

    dim_feats = my_inception_v3.last_linear.in_features  # =2048
    nb_classes = 4
    my_inception_v3.last_linear = nn.Linear(dim_feats, nb_classes)
    return my_inception_v3
コード例 #4
0
ファイル: models.py プロジェクト: Vlad0922/kaggle_imet_fgvc
    def __init__(self, use_dropout=False):
        super(InceptionV3Body, self).__init__()

        self.model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        self.features = self.model.features
        self.input_size = list(self.model.children())[-1].in_features

        self.use_dropout = use_dropout
コード例 #5
0
def inceptionv3(input_size=(299, 299, 3), num_classes=1000, pretrained=None):
    model = models.inceptionv3(num_classes=1000, pretrained=None)
    if input_size != (299, 299, 3):
        model.features[0].conv = nn.Conv2d(input_size[0],
                                           32,
                                           kernel_size=3,
                                           stride=2,
                                           bias=False)
    pass
コード例 #6
0
    def __init__(self, use_gpu: bool = True, transform: bool = True):
        super().__init__()
        print('USING InceptionV3Extractor')
        self.cnn = pretrainedmodels.inceptionv3()

        self.tf_image = utils.TransformImage(self.cnn)
        self.transform = transform
        self.use_gpu = use_gpu
        if self.use_gpu:
            self.cnn = self.cnn.cuda()
        self.cnn.eval()
        self.features_size = 2048
        self.regions_count = 64
        self.regions_features_size = 2048

        for param in self.cnn.parameters():
            param.requires_grad = False
コード例 #7
0
def generate_2D_model(opt):
    if opt['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'vgg16':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.vgg16(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'vgg19':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.vgg19(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'resnet50':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet50(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'resnet101':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet101(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'nasnet':
        C, H, W = 3, 331, 331
        model = pretrainedmodels.nasnetalarge(num_classes=1001,
                                              pretrained='imagenet+background')
        load_image_fn = utils.LoadTransformImage(model)
    else:
        print("doesn't support %s" % (opt['model']))

    model.last_linear = utils.Identity()
    model = nn.DataParallel(model)
    # if opt['saved_model'] != '':
    #     model.load_state_dict(torch.load(opt['saved_model']), strict=False)
    model = model.cuda()
    return model
コード例 #8
0
def get_model(model_name='resnext101_32x16d_wsl', n_classes=6, raw=False):

    model = None

    if model_name.startswith('eff'):
        model = EfficientNet.from_pretrained(model_name)
        last_output_size=2048

        if model_name.endswith('b1') or model_name.endswith('b0'):
            last_output_size = 1280
            if raw:
                raise Exception('Checkout for other effnet types the channel size out last output')
        elif model_name.endswith('b2'):
            if raw:
                model._conv_stem = Conv2d(1, 32, kernel_size=3, stride=2, bias=False)
                # torch.nn.init.xavier_normal_(model._conv_stem.weight)
            last_output_size = 1408
        elif model_name.endswith('b4'):
            if raw:
                model._conv_stem = Conv2d(1, 48, kernel_size=3, stride=2, bias=False, padding=(0,1))
                # torch.nn.init.xavier_normal_(model._conv_stem.weight)
            last_output_size = 1792
        else:
            raise Exception('Checkout for other effnet types the channel size out last output')

        model._fc = torch.nn.Linear(last_output_size, n_classes)

    elif model_name.startswith('resnext101_32x') and model_name.endswith('wsl'):
        model = torch.hub.load('facebookresearch/WSL-Images', model_name)
        if raw:
            model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        model.fc = torch.nn.Linear(2048, n_classes)
    elif model_name == 'inceptionv3':
        model = inceptionv3()
        model.last_linear = torch.nn.Linear(2048, n_classes)
        if raw:
            model.Conv2d_1a_3x3 = BasicConv2d(1, 32, kernel_size=3, stride=2)

    if model is None:
        raise Exception('failed to instantiate model: '+ model_name)

    return model
コード例 #9
0
def main(args):
    global C, H, W
    coco_labels = json.load(open(args.coco_labels))
    num_classes = coco_labels['num_classes']
    if args.model == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')

    elif args.model == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')

    elif args.model == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')

    else:
        print("doesn't support %s" % (args['model']))

    load_image_fn = utils.LoadTransformImage(model)
    dim_feats = model.last_linear.in_features
    model = MILModel(model, dim_feats, num_classes)
    model = model.cuda()
    dataset = CocoDataset(coco_labels)
    dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)
    exp_lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=args.learning_rate_decay_every,
        gamma=args.learning_rate_decay_rate)

    crit = nn.MultiLabelSoftMarginLoss()
    if not os.path.isdir(args.checkpoint_path):
        os.mkdir(args.checkpoint_path)
    train(dataloader, model, crit, optimizer, exp_lr_scheduler, load_image_fn,
          args)
コード例 #10
0
ファイル: main.py プロジェクト: yinli0208/fine-tuning.pytorch
def getNetwork(args):
    if (args.net_type == 'alexnet'):
        net = models.alexnet(pretrained=args.finetune)
        file_name = 'alexnet'
    elif (args.net_type == 'vggnet'):
        if (args.depth == 11):
            net = models.vgg11(pretrained=args.finetune)
        elif (args.depth == 13):
            net = models.vgg13(pretrained=args.finetune)
        elif (args.depth == 16):
            net = models.vgg16(pretrained=args.finetune)
        elif (args.depth == 19):
            net = models.vgg19(pretrained=args.finetune)
        else:
            print(
                'Error : VGGnet should have depth of either [11, 13, 16, 19]')
            sys.exit(1)
        file_name = 'vgg-%s' % (args.depth)
    elif (args.net_type == 'squeezenet'):
        net = models.squeezenet1_0(pretrained=args.finetune)
        file_name = 'squeeze'
    elif (args.net_type == 'resnet'):
        net = resnet(args.finetune, args.depth)
        file_name = 'resnet-%s' % (args.depth)
    elif (args.net_type == 'inception'):
        net = pretrainedmodels.inceptionv3(num_classes=1000,
                                           pretrained='imagenet')
        file_name = 'inception-v3'
    elif (args.net_type == 'xception'):
        net = pretrainedmodels.xception(num_classes=1000,
                                        pretrained='imagenet')
        file_name = 'xception'
    else:
        print(
            'Error : Network should be either [alexnet / squeezenet / vggnet / resnet]'
        )
        sys.exit(1)

    return net, file_name
コード例 #11
0
ファイル: prepro_feats.py プロジェクト: stillarrow/S2VT_ACT
                        help='how many frames to sampler per video')
    parser.add_argument("--video_path",
                        dest='video_path',
                        type=str,
                        default='data/MSR-VTT_Lite/Train_Val_Video',
                        help='path to video dataset')
    parser.add_argument("--model",
                        dest="model",
                        type=str,
                        default='resnet152',
                        help='the CNN model you want to use to extract_feats')
    args = parser.parse_args()
    params = vars(args)
    if params['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    else:
        print("doesn't support %s" % (params['model']))
コード例 #12
0
def InceptionV3(num_classes, pretrained=False):
    if pretrained:
        model = inceptionv3(pretrained='imagenet')
    model.avg_pool = nn.AdaptiveAvgPool2d(1)
    model.last_linear = nn.Linear(2048, num_classes)
    return model
コード例 #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--video_dir',
        type=str,
        default='../msrvtt_2017/train-video',
        help='The video dir that one would like to extract audio file from')
    parser.add_argument('--output_dir',
                        type=str,
                        default='../msrvtt_2017/preprocessed',
                        help='The file output directory')
    parser.add_argument(
        '--output_channels',
        type=int,
        default=1,
        help='The number of output audio channels, default to 1')
    parser.add_argument(
        '--output_frequency',
        type=int,
        default=16000,
        help='The output audio frequency in Hz, default to 16000')
    parser.add_argument(
        '--band_width',
        type=int,
        default=160,
        help=
        'Bandwidth specified to sample the audio (unit in kbps), default to 160'
    )
    parser.add_argument(
        '--model',
        type=str,
        default='resnet152',
        help=
        'The pretrained model to use for extracting image features, default to resnet152'
    )
    parser.add_argument('--gpu',
                        type=str,
                        default='0',
                        help='The CUDA_VISIBLE_DEVICES argument, default to 0')
    parser.add_argument(
        '--n_frame_steps',
        type=int,
        default=80,
        help='The number of frames to extract from a single video')
    opt = parser.parse_args()
    opt = vars(opt)

    if not os.path.exists(opt['output_dir']):
        os.mkdir(opt['output_dir'])
    vToA(opt)
    split_audio(opt)
    print('cleaning up original .wav files...')
    dir = opt['output_dir']
    dir = os.listdir(dir)
    for file in dir:
        if file.endswith('.wav'):
            os.remove(os.path.join(opt['output_dir'], file))

    os.environ['CUDA_VISIBLE_DEVICES'] = opt['gpu']
    if opt['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'vgg16':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.vgg16(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    else:
        print('The image model is not supported')

    model.last_linear = utils.Identity()
    model = nn.DataParallel(model)

    model = model.cuda()
    extract_image_feats(opt, model, load_image_fn)
コード例 #14
0
    parser.add_argument('--gpu_id',
                        type=int,
                        default=0,
                        help='the gpu id to use')
    args = parser.parse_args()
    args.device = torch.device(
        'cuda:' + str(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    args.output_dir = os.path.join(args.output_dir, args.model)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    params = vars(args)

    if params['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(num_classes=1000,
                                           pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'inceptionresnetv2':
コード例 #15
0
def Model_builder(configer):

    model_name = configer.model['name']
    No_classes = configer.dataset_cfg["id_cfg"]["num_classes"]
    model_pretrained = configer.model['pretrained']
    model_dataparallel = configer.model["DataParallel"]
    model_gpu_replica = configer.model["Multi_GPU_replica"]
    gpu_ids = configer.train_cfg["gpu"]

    if model_name == "Inceptionv3":
        model = PM.inceptionv3(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Xception":
        model = PM.xception(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "VGG_19":
        model = PM.vgg19(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Resnet18":
        model = PM.resnet18(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Resnet50":
        model = PM.resnet50(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Resnet101":
        model = PM.resnet101(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Resnet152":
        model = PM.resnet152(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Resnet34":
        model = PM.resnet34(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "Densenet121":
        model = PM.densenet121(num_classes=1000, pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "ResNeXt101-32":
        model = PM.resnext101_32x4d(num_classes=1000,
                                    pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "ResNeXt101-64":
        model = PM.resnext101_64x4d(num_classes=1000,
                                    pretrained=model_pretrained)
        d = model.last_linear.in_features
        model.last_linear = nn.Linear(d, No_classes)

    elif model_name == "MobilenetV2":
        model = MobileNetV2(n_class=No_classes)

    else:
        raise ImportError("Model Architecture not supported")

    # Performing Data Parallelism if configured

    if model_dataparallel:

        model = torch.nn.DataParallel(model.to(device), device_ids=gpu_ids)

    elif model_gpu_replica:

        torch.distributed.init_process_group(backend='nccl',
                                             world_size=1,
                                             rank=1)
        model = torch.nn.DistributedDataParallel(model.to(device),
                                                 device_ids=gpu_ids)

    else:
        model = model.to(device)

    print('---------- Model Loaded')

    return model
        x: DataLoader(image_datasets[x],
                      batch_size=opt.batch_size,
                      shuffle=True,
                      **kwargs)
        for x in mode
    }

    class_names = image_datasets['train'].classes

    dataset_size = {x: len(image_datasets[x]) for x in mode}
    print('#training images \n')
    print(dataset_size)

    # define my net and criterion optimizer

    my_inception_v3 = pretrainedmodels.inceptionv3(1000, pretrained='imagenet')
    #my_inception_v3 = torchvision.models.inception_v3(pretrained=True)

    dim_feats = my_inception_v3.last_linear.in_features  # =2048
    nb_classes = 4
    my_inception_v3.last_linear = nn.Linear(dim_feats, nb_classes)

    my_inception_v3 = nn.DataParallel(my_inception_v3).cuda()
    my_inception_v3 = my_inception_v3.to(device)

    criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(my_inception_v3.parameters(),
                          lr=opt.lr,
                          momentum=0.9,
                          weight_decay=5e-4)