Beispiel #1
0
def test_vgg_side_archs_torch(mocker, github, arch):
    model = getattr(models, arch)(pretrained=False)
    mocker.patch(
        "torch.hub.load_state_dict_from_url", return_value=model.state_dict(),
    )

    assert hub.load(github, arch, pretrained=True, framework="torch")
Beispiel #2
0
 def test_load_from_github(self):
     hub_model = hub.load('ailzhang/torchhub_example',
                          'mnist',
                          pretrained=True,
                          verbose=False)
     self.assertEqual(sum_of_state_dict(hub_model.state_dict()),
                      SUM_OF_HUB_EXAMPLE)
Beispiel #3
0
 def test_load_from_github(self):
     hub_model = hub.load("pytorch/vision",
                          "resnet18",
                          pretrained=True,
                          progress=False)
     assert sum_of_model_parameters(hub_model).item() == pytest.approx(
         SUM_OF_PRETRAINED_RESNET18_PARAMS)
Beispiel #4
0
 def __init__(self, pretrained=True):
     super(ShuffleNetV2, self).__init__()
     self.model = hub.load('pytorch/vision:v0.5.1',
                           'shufflenet_v2_x1_0',
                           pretrained=pretrained)
     self._out_features_channels = [116, 232, 464, 1024]
     self._out_features_strides = [2**(i + 1) for i in range(2, 5)] + [2**5]
Beispiel #5
0
 def test_set_dir(self):
     temp_dir = tempfile.gettempdir()
     hub.set_dir(temp_dir)
     hub_model = hub.load('pytorch/vision', 'resnet18', pretrained=True)
     self.assertEqual(self.resnet18_pretrained, hub_model.state_dict())
     assert os.path.exists(temp_dir + '/vision_master')
     shutil.rmtree(temp_dir + '/vision_master')
Beispiel #6
0
 def test_set_dir(self):
     temp_dir = tempfile.gettempdir()
     hub.set_dir(temp_dir)
     hub_model = hub.load('pytorch/vision', 'resnet18', pretrained=True)
     self.assertEqual(sum_of_model_parameters(hub_model),
                      SUM_OF_PRETRAINED_RESNET18_PARAMS)
     assert os.path.exists(temp_dir + '/pytorch_vision_master')
     shutil.rmtree(temp_dir + '/pytorch_vision_master')
Beispiel #7
0
 def test_load_zip_1_6_checkpoint(self):
     hub_model = hub.load('ailzhang/torchhub_example',
                          'mnist_zip_1_6',
                          pretrained=True,
                          verbose=False,
                          trust_repo=True)
     self.assertEqual(sum_of_state_dict(hub_model.state_dict()),
                      SUM_OF_HUB_EXAMPLE)
Beispiel #8
0
def get_drp_unet(in_channels, pretrained):
    model = hub.load('mateuszbuda/brain-segmentation-pytorch',
                     'unet',
                     in_channels=in_channels,
                     pretrained=pretrained)

    model.__class__.forward = forward
    return model
Beispiel #9
0
 def test_load_from_github(self):
     hub_model = hub.load('pytorch/vision',
                          'resnet18',
                          pretrained=True,
                          progress=False)
     self.assertAlmostEqual(sum_of_model_parameters(hub_model).item(),
                            SUM_OF_PRETRAINED_RESNET18_PARAMS,
                            places=2)
def init():
    """Encapsulates the initialization and compensates for some minor quirks in TorchHub"""
    global tacotron2
    global waveglow
    tacotron2 = hub.load('nvidia/DeepLearningExamples:torchhub',
                         'nvidia_tacotron2')
    tacotron2 = tacotron2.to('cuda')
    # Tell the model we're using it to evaluate, not to train (optimization)
    tacotron2.eval()
    sys.stderr.write("\n")

    waveglow = hub.load('nvidia/DeepLearningExamples:torchhub',
                        'nvidia_waveglow')
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow = waveglow.to('cuda')
    # As above: disable training specific stuff
    waveglow.eval()
    sys.stderr.write("\n")
Beispiel #11
0
 def test_load_from_local_dir(self):
     local_dir = hub._get_cache_or_reload('ailzhang/torchhub_example',
                                          force_reload=False)
     hub_model = hub.load(local_dir,
                          'mnist',
                          source='local',
                          pretrained=True,
                          verbose=False)
     self.assertEqual(sum_of_state_dict(hub_model.state_dict()),
                      SUM_OF_HUB_EXAMPLE)
Beispiel #12
0
 def test_set_dir(self):
     temp_dir = tempfile.gettempdir()
     hub.set_dir(temp_dir)
     hub_model = hub.load('ailzhang/torchhub_example',
                          'mnist',
                          pretrained=True,
                          verbose=False)
     self.assertEqual(sum_of_state_dict(hub_model.state_dict()),
                      SUM_OF_HUB_EXAMPLE)
     assert os.path.exists(temp_dir + '/ailzhang_torchhub_example_master')
     shutil.rmtree(temp_dir + '/ailzhang_torchhub_example_master')
Beispiel #13
0
 def test_set_dir(self):
     temp_dir = tempfile.gettempdir()
     hub.set_dir(temp_dir)
     hub_model = hub.load('pytorch/vision',
                          'resnet18',
                          pretrained=True,
                          progress=False)
     assert sum_of_model_parameters(hub_model).item() == pytest.approx(
         SUM_OF_PRETRAINED_RESNET18_PARAMS)
     assert os.path.exists(temp_dir + '/pytorch_vision_master')
     shutil.rmtree(temp_dir + '/pytorch_vision_master')
Beispiel #14
0
 def test_set_dir(self):
     temp_dir = tempfile.gettempdir()
     hub.set_dir(temp_dir)
     hub_model = hub.load("pytorch/vision",
                          "resnet18",
                          weights="DEFAULT",
                          progress=False)
     assert sum_of_model_parameters(hub_model).item() == pytest.approx(
         SUM_OF_PRETRAINED_RESNET18_PARAMS)
     assert os.path.exists(temp_dir + "/pytorch_vision_master")
     shutil.rmtree(temp_dir + "/pytorch_vision_master")
Beispiel #15
0
 def test_load_legacy_zip_checkpoint(self):
     with warnings.catch_warnings(record=True) as ws:
         warnings.simplefilter("always")
         hub_model = hub.load('ailzhang/torchhub_example',
                              'mnist_zip',
                              pretrained=True,
                              verbose=False)
         self.assertEqual(sum_of_state_dict(hub_model.state_dict()),
                          SUM_OF_HUB_EXAMPLE)
         assert any(
             "will be deprecated in favor of default zipfile" in str(w)
             for w in ws)
Beispiel #16
0
def main():
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '/home/john/Data/mnist',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    model = hub.load("johnhany/torchhub:master",
                     "cnn",
                     force_reload=True,
                     pretrained=True).to(device)

    test(args, model, device, test_loader)
Beispiel #17
0
 def __init__(self, num_classes):
     super(SelfDefinedModel, self).__init__()
     self.classifier = load(repo_or_dir='huawei-noah/ghostnet',
                            model='ghostnet_1x',
                            pretrained=True,
                            progress=True)
     self.classifier.conv_stem = nn.Conv2d(
         in_channels=1,
         out_channels=self.classifier.conv_stem.out_channels,
         kernel_size=self.classifier.conv_stem.kernel_size,
         stride=self.classifier.conv_stem.stride,
         padding=self.classifier.conv_stem.padding,
         bias=self.classifier.conv_stem.bias)
     self.classifier.classifier = nn.Linear(
         in_features=self.classifier.classifier.in_features,
         out_features=num_classes)
Beispiel #18
0
 def _load_model(self, path: str, bpe: str, bpe_filename:str) -> RobertaHubInterface:
     if path == "xlmr.large" or path == "xlmr.base":
         return hub.load("pytorch/fairseq", path, force_reload=True)
     else:
         checkpoint_file = "model.pt" if os.path.exists(os.path.join(path, "model.pt")) else "checkpoint_best.pt"
         loaded = hub_utils.from_pretrained(
             model_name_or_path=path,
             checkpoint_file=checkpoint_file,
             data_name_or_path=path,
             bpe=bpe,
             sentencepiece_vocab=os.path.join(path, bpe_filename),
             sentencepiece_model=os.path.join(path, bpe_filename),
             load_checkpoint_heads=True,
             archive_map=RobertaModel.hub_models(),
             cpu=False
         )
         return RobertaHubInterface(loaded['args'], loaded['task'], loaded['models'][0])
def test_hub_johnson_alahi_li_2016_transformer(subtests, github):
    def configs():
        file = path.join(pystiche_papers.__path__[0], "johnson_alahi_li_2016",
                         "model_urls.csv")
        with open(
                file,
                "r",
        ) as fh:
            for row in csv.DictReader(fh):
                row["impl_params"] = str_to_bool(row["impl_params"])
                row["instance_norm"] = str_to_bool(row["instance_norm"])
                del row["url"]
                yield row

    for config in configs():
        with subtests.test(**config):
            assert hub.load(github, "johnson_alahi_li_2016_transformer",
                            **config)
Beispiel #20
0
 def __init__(self):
     super().__init__()
     resnet101 = hub.load('pytorch/vision:v0.6.0', 'resnet101', pretrained=True)
     resnet101 = list(resnet101.children())[:-3]
     self.resnet101 = nn.Sequential(
         *resnet101,
     )
     self.classifier = nn.Sequential(
         nn.Conv2d(1024, 2048, 3, stride=2, padding=1),
         nn.ReLU(),
         nn.Conv2d(2048, 1024, 3),
         nn.ReLU(),
         nn.Conv2d(1024, 1024, 3),
         nn.ReLU(),
         nn.Conv2d(1024, 1024, 3),
         nn.ReLU(),
         nn.Flatten(),
         nn.Linear(1024, 196),
     )
Beispiel #21
0
    def test_get_set_dir(self):
        previous_hub_dir = torch.hub.get_dir()
        with tempfile.TemporaryDirectory('hub_dir') as tmpdir:
            torch.hub.set_dir(tmpdir)
            self.assertEqual(torch.hub.get_dir(), tmpdir)
            self.assertNotEqual(previous_hub_dir, tmpdir)

            hub_model = hub.load('ailzhang/torchhub_example',
                                 'mnist',
                                 pretrained=True,
                                 verbose=False)
            self.assertEqual(sum_of_state_dict(hub_model.state_dict()),
                             SUM_OF_HUB_EXAMPLE)
            assert os.path.exists(
                os.path.join(tmpdir, 'ailzhang_torchhub_example_master'))

        # Test that set_dir properly calls expanduser()
        # non-regression test for https://github.com/pytorch/pytorch/issues/69761
        new_dir = os.path.join("~", "hub")
        torch.hub.set_dir(new_dir)
        self.assertEqual(torch.hub.get_dir(), os.path.expanduser(new_dir))
Beispiel #22
0
 def test_load_from_github(self):
     hub_model = hub.load('pytorch/vision', 'resnet18', pretrained=True)
     self.assertEqual(self.resnet18_pretrained, hub_model.state_dict())
    torch.cuda.empty_cache()

    basedir = '../data/kitti_raw'
    date = '2011_09_26'
    drive = '0005'

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    dataset = pykitti.raw(basedir, date, drive)
    idx = 16
    file_name = "{}_{}_{}.png".format(
        date, drive,
        os.path.basename(dataset.cam2_files[idx])[:-4])

    model = hub.load('TheCodez/pytorch-GoogLeNet-FCN',
                     'googlenet_fcn',
                     pretrained='cityscapes')
    model = model.to(device)
    model.eval()

    img = dataset.get_cam2(idx)
    pc_velo = dataset.get_velo(idx)

    print("Inference")
    pred = autolabel.semantic_segmentation(model, img, device)

    pc_velo = autolabel.get_points_in_fov_90(pc_velo)

    print("Transferring labels")
    pc_labels = autolabel.transfer_labels(pc_velo, pred,
                                          dataset.calib.T_cam0_velo,
Beispiel #24
0
    def _get_resnet_partially_trained(cls,
                                      num_classes=None,
                                      pretrained=True,
                                      freeze=6,
                                      net_version=18,
                                      to_grayscale=False):
        '''
        Obtains the pretrained resnet18 model from the Web
        if not cached. Then:
           o Freezes the leftmost freeze layers
             so that they are unaffected by subsequent training.
             
           o Modifies the number of output classes from resnet's
             defaulat 1000 to num_classes.
             
           o Modifies the input layer to expect grayscale,
             i.e. only one channel, instead of three. The
             weights are retained from the pretrained model.
              
        If freeze is zero, all layers of the pretrained model
        are allowed to change during training.
        
        If running under Distributed Data Processing (DDP) 
        protocol, only the master node will download, and
        then share with the others.
        
        :param num_classes: number of target classes
        :type num_classes: int
        :param pretrained: if true, the pre-trained version
            is obtained. Else initial weights are undefined
        :type pretrained: bool
        :param freeze: how many layers to
            freeze, protecting them from training.
        :type freeze: int
        :param net_version: which Resnet to return: 18 or 50
        :type net_version: int
        :return: a fresh model
        :rtype: pytorch.nn 
        '''

        if num_classes is None:
            raise ValueError("Num_classes argument is required")

        net_name = 'resnet'
        available_versions = (18, 34, 50)

        if net_version not in available_versions:
            raise ValueError(
                f"{net_name} version must be one of {available_versions}")

        model = hub.load('pytorch/vision:v0.6.0',
                         f'{net_name}{net_version}',
                         pretrained=pretrained)

        if to_grayscale:
            model = cls._first_layer_to_in_channel1(model, net_name)

        # Keep as many layers as requested
        # in the config file protected against
        # further learning:
        cls.freeze_model_layers(model, freeze)

        num_in_features = model.fc.in_features

        model.fc = nn.Linear(num_in_features, num_classes)

        # Create a property on the model that
        # returns the number of output classes:
        model.num_classes = model.fc.out_features
        return model
Beispiel #25
0
 def test_load_from_github(self):
     hub_model = hub.load('pytorch/vision', 'resnet18', pretrained=True)
     self.assertEqual(sum_of_model_parameters(hub_model),
                      SUM_OF_PRETRAINED_RESNET18_PARAMS)
Beispiel #26
0
def test_hub_load_smoke(github, model):
    assert isinstance(hub.load(github, model), nn.Module)
Beispiel #27
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
        gpus = list(range(len(args.gpu.split(','))))
    else:
        gpus = [0]  # [1,2]

    # if args.arch not in model_names:
    #     raise NotImplementedError('Other optimizer is not implemented')
    # # elif args.arch == 'DPN26' or 'DPN92':
    #     # from model import MultiModalNet
    #     # model = MultiModalNet("se_resnext101_32x4d","dpn26",0.5)
    # else:
    #     Net = getattr(modelZoo, args.arch)
    #     model = Net(num_classes=args.num_classes)

    # model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

    if args.pretrained:
        if args.arch == 'se_resnext101_32x4d':
            print('=> creating model {}'.format(args.arch))
            Net = getattr(modelZoo, args.arch)
            model = Net(
                args.arch, 0.5,
                args.num_classes)  # here only use 'se_resnext101_32x4d'
        elif args.arch == 'pnasnet5large' or args.arch == 'se-resnext152':
            print('=> creating model {}'.format(args.arch))
            model = pretrainedmodels.__dict__[args.arch](num_classes=1000,
                                                         pretrained='imagenet')
            model.last_linear = nn.Linear(model.last_linear.in_features,
                                          args.num_classes)
        elif args.arch in resnext101_groups:
            print('=> creating model {}'.format(args.arch))
            model = hub.load('facebookresearch/WSL-Images', args.arch)
            for param in model.parameters():
                param.requires_grad = False
            model.fc = nn.Sequential(nn.Dropout(p=0.5), nn.Linear(2048, 1024),
                                     nn.LeakyReLU(inplace=True),
                                     nn.Linear(1024, 54))
        elif args.arch in efficientnet_groups:
            print('=> creating model {}'.format(args.arch))
            # model = modelZoo.EfficientNet_CBAM(args.arch)

            Net = getattr(modelZoo, 'MultiNet')
            model = Net(args.arch)

            # Net = getattr(modelZoo, 'EfficientNet')
            # model = Net.from_pretrained(args.arch)
            # model._fc = nn.Linear(model._fc.in_features, args.num_classes)

            # model._fc = nn.Sequential(
            #     nn.BatchNorm1d(model._fc.in_features),
            #     nn.Linear(model._fc.in_features, 256),
            #     nn.LeakyReLU(inplace=True),
            #     nn.Dropout(0.4),
            #     nn.Linear(256, args.num_classes)
            # )
        else:
            print('=> creating model {}'.format(args.arch))
            Net = getattr(models, args.arch)
            model = Net(pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()
        model.fc = nn.Linear(model.fc.in_features, args.num_classes)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
    # criterion = FocalLoss_v2(num_class=args.num_classes).cuda(args.gpu)
    # criterion = LabelSmoothSoftmaxCEV2(lb_smooth=0.1, lb_ignore=-100)
    # criterion.cuda()

    # optimizer = torch.optim.SGD(model.parameters(), args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)
    optimizer = RangerVA(model.parameters(),
                         lr=args.lr,
                         betas=(0.95, 0.999),
                         eps=1e-6)
    # optimizer = torch.optim.Adam(params=model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        # if os.path.isfile(args.resume):
        if os.path.exists(args.resume) and (not os.path.isdir(args.resume)):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            if args.resume.startswith('/cache/tmp/'):
                os.remove(args.resume)

            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data_local, args.train_data)
    valdir = os.path.join(args.data_local, args.val_data)
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # ImageFolder类会将traindir目录下的每个子目录名映射为一个label id,然后将该id作为模型训练时的标签
    # 比如,traindir目录下的子目录名分别是0~53,ImageFolder类将这些目录名当做class_name,再做一次class_to_idx的映射
    # 最终得到这样的class_to_idx:{"0": 0, "1":1, "10":2, "11":3, ..., "19": 11, "2": 12, ...}
    # 其中key是class_name,value是idx,idx就是模型训练时的标签
    # 因此我们在保存训练模型时,需要保存这种idx与class_name的映射关系,以便在做模型推理时,能根据推理结果idx得到正确的class_name

    # if args.eval_pth != '':
    #     if mox.file.exists(args.eval_pth) and (not mox.file.is_directory(args.eval_pth)):
    #         if args.eval_pth.startswith('s3://'):
    #             model_name = args.eval_pth.rsplit('/', 1)[1]
    #             mox.file.copy(args.eval_pth, '/cache/tmp/' + model_name)
    #             args.eval_pth = '/cache/tmp/' + model_name
    #         print("=> loading checkpoint '{}'".format(args.eval_pth))
    #         if args.gpu is None:
    #             checkpoint = torch.load(args.eval_pth)
    #         else:
    #             # Map model to be loaded to specified single gpu.
    #             loc = 'cuda:{}'.format(args.gpu)
    #             checkpoint = torch.load(args.eval_pth, map_location=loc)
    #         if args.eval_pth.startswith('/cache/tmp/'):
    #             os.remove(args.eval_pth)
    #
    #         args.start_epoch = checkpoint['epoch']
    #         best_acc1 = checkpoint['best_acc1']
    #         if args.gpu is not None:
    #             # best_acc1 may be from a checkpoint from a different GPU
    #             best_acc1 = best_acc1.to(args.gpu)
    #         model.load_state_dict(checkpoint['state_dict'])
    #         optimizer.load_state_dict(checkpoint['optimizer'])
    #         print("=> loaded checkpoint '{}' (epoch {})"
    #               .format(args.eval_pth, checkpoint['epoch']))
    #     else:
    #         print("=> no checkpoint found at '{}'".format(args.eval_pth))
    #
    #     validate(val_loader, model, criterion, args)
    #     return

    if args.visdom:
        vis_title = 'PyTorch on DCNN'
        vis_legend = ['Train Loss', 'Val Loss']
        vis_legend_1 = ['Acc-Top1', 'Acc-Top5']
        iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend)
        epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend)
        acc_plot = create_vis_plot('Iteration', 'Acc', vis_title, vis_legend_1)
        plots = [iter_plot, epoch_plot, acc_plot]

    for epoch in range(args.start_epoch, args.epochs):
        # load data ========================================================
        switch_freq = 10
        if (epoch // switch_freq) % 3 == 0:
            input_size = 256  # bz=18
            args.batch_size = 18
        elif (epoch // switch_freq) % 3 == 1:
            input_size = 380  # bz=8
            args.batch_size = 8
        elif (epoch // switch_freq) % 3 == 2:
            input_size = 456  # bz=6
            args.batch_size = 6
        print('input_size, bz:', input_size, args.batch_size)

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(input_size),
                # PowerPIL(),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))
        idx_to_class = OrderedDict()
        for key, value in train_dataset.class_to_idx.items():
            idx_to_class[value] = key

        if args.distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                train_dataset)
        else:
            train_sampler = None

        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=(train_sampler is None),
            num_workers=args.workers,
            pin_memory=True,
            sampler=train_sampler)

        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(input_size),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(),
                normalize,
            ])),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)

        # ===========================================================================
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args, plots)

        # evaluate on validation set
        if (epoch + 1) % args.print_freq == 0:
            acc1 = validate(val_loader, model, criterion, args)

            # remember best acc@1 and save checkpoint
            is_best = False
            best_acc1 = max(acc1.item(), best_acc1)
            pth_file_name = os.path.join(
                args.train_local, 'epoch_%s_%s.pth' %
                (str(epoch + 1), str(round(acc1.item(), 3))))
            if not args.multiprocessing_distributed or (
                    args.multiprocessing_distributed
                    and args.rank % ngpus_per_node == 0):
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_acc1': best_acc1,
                        'optimizer': optimizer.state_dict(),
                        'idx_to_class': idx_to_class
                    }, is_best, pth_file_name, args)

    if args.epochs >= args.print_freq:
        save_best_checkpoint(best_acc1, args)
Beispiel #28
0
        raise ValueError("model_repo or model_name not provided")
    chk_point_files = None
    model_repo_str = "_".join(model_repo.split("/"))
    if ":" in model_repo_str:
        model_repo_str = "_".join(model_repo_str.split(":"))
    else:
        model_repo_str = f"{model_repo_str}_master"
    model_dir = isdir(os.path.join(cache_dir, model_repo_str))
    chk_point_dir = join(cache_dir, "checkpoints")
    try:
        chk_point_files = any([
            f.startswith(model_name) for f in listdir(chk_point_dir)
            if isfile(join(chk_point_dir, f))
        ])
    except FileNotFoundError as e:
        logger.error(e)
    if model_dir and chk_point_files:
        return True
    else:
        return False


if __name__ == "__main__":
    from torch import hub
    hub.load("pytorch/vision:v0.6.0", "resnet18", pretrained=True)
    two = is_model_cached("pytorch/vision:v0.6.0", "resnet18")
    print("checking if resnet18 v0.6.0 is cached: ", two)
    hub.load("pytorch/vision", "resnet18", pretrained=True)
    two = is_model_cached("pytorch/vision", "resnet18")
    print("checking if resnet18 master is cached: ", two)
Beispiel #29
0
    def train_model(
        self,
        train_csv,
        train_data_dir,
        val_csv,
        val_data_dir,
        num_epochs=2,
        batch_size=64,
        save_graph=True,
        graph_fname="train-val-loss.png",
        save_model=True,
        model_fname="resnest_model",
    ):
        train_dataset = ImageDataset(train_csv, train_data_dir,
                                     self.transform.training)
        validation_dataset = ImageDataset(val_csv, val_data_dir,
                                          self.transform.validation)
        batch_size = batch_size

        train_loader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  drop_last=True)

        validation_loader = DataLoader(
            validation_dataset,
            batch_size=batch_size,
            shuffle=False,
            drop_last=False,
        )

        # get list of models
        hub.list("zhanghang1989/ResNeSt", force_reload=True)
        # load pretrained models, using ResNeSt-50 as an example
        resnest = hub.load("zhanghang1989/ResNeSt",
                           "resnest50",
                           pretrained=True)
        # Freeze layers
        for param in resnest.parameters():
            param.requires_grad = False

        # Define thelast layers to be retrained
        num_ftrs = resnest.fc.in_features
        # Redifine last layer of the model
        resnest.fc = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(num_ftrs, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 17),
        )
        resnest = resnest.to(self.device)
        optimizer = optim.SGD(resnest.fc.parameters(),
                              lr=0.01,
                              momentum=0.9,
                              weight_decay=0.01)

        # Weights for imbalanced data
        pos_weights = DataStats.pos_weights(train_csv)
        pos_weights = log(as_tensor(pos_weights, dtype=float))
        pos_weights = pos_weights.to(self.device)

        training_losses, validation_losses = Trainer.train(
            resnest,
            optimizer,
            train_loader,
            validation_loader,
            num_epochs=num_epochs,
            # scheduler=scheduler,
            verbose=True,
            pos_weights=pos_weights,
            device=self.device,
        )

        if save_graph:
            PlotUtils.plot_losses(training_losses, validation_losses,
                                  graph_fname)

        if save_model:
            self._save_model(resnest, "", model_fname)

        if self.device == "cuda":
            cuda.empty_cache()
Beispiel #30
0
    def get_model_ddp(
        cls,
        rank,
        local_leader_rank,
        log,
        net_version,
        pretrained,
        freeze,
    ):  # @DontTrace
        '''
        Determine whether this process is the
        master node. If so, obtain the pretrained
        resnet18 model. Then distributed the model
        to the other nodes. 
        
        :param rank: this process' rank
            in the distributed data processing sense
        :type rank: int
        :param local_leader_rank: the lowest rank on this machine
        :type local_leader_rank: int
        :param log: logging service to log to
        :type log: LoggingService
        :param net_version: which resnet version to obtain
        :type net_version: int
        :param pretrained: if true, the pre-trained version
            is obtained. Else initial weights are undefined
        :type pretrained: bool
        :param freeze: how many layers to
            freeze, protecting them from training.
        :type freeze: int
        '''

        if net_version not in (18, 50):
            raise ValueError("Resnet version must be 18 or 50")

        hostname = socket.gethostname()
        # Let the local leader download
        # the model from the Internet,
        # in case it is not already cached
        # locally:

        # Case 1: not on a GPU machine:
        device = device('cuda' if cuda.is_available() else 'cpu')
        if device == device('cpu'):
            model = hub.load('pytorch/vision:v0.6.0',
                             'resnet18' if net_version == 18 else 'resnet50',
                             pretrained=pretrained)

        # Case2a: GPU machine, and this is this machine's
        #         leader process. So it is reponsible for
        #         downloading the model if it is not cached:
        elif rank == local_leader_rank:
            log.info(f"Procss with rank {rank} on {hostname} loading model")
            model = hub.load('pytorch/vision:v0.6.0',
                             'resnet18' if net_version == 18 else 'resnet50',
                             pretrained=pretrained)

            # Allow the others on this machine
            # to load the model (guaranteed to
            # be locally cached now):
            log.info(
                f"Procss with rank {rank} on {hostname} waiting for others to laod model"
            )
            dist.barrier()
        # Case 2b: GPU machine, but not the local leader. Just
        #          wait for the local leader to be done downloading:
        else:
            # Wait for leader to download the
            # model for everyone on this machine:
            log.info(
                f"Process with rank {rank} on {hostname} waiting for leader to laod model"
            )
            dist.barrier()
            # Get the cached version:
            log.info(f"Procss with rank {rank} on {hostname} laoding model")
            model = hub.load('pytorch/vision:v0.6.0',
                             'resnet18' if net_version == 18 else 'resnet50',
                             pretrained=pretrained)

        model = cls.freeze_model_layers(model, freeze)

        return model