コード例 #1
0
ファイル: mnist_conv.py プロジェクト: cicicici/deeptensor
 def build_data(self):
     dt.trace(dt.DC.MODEL, "[{}] ({}) build data".format(self.tag, type(self).__name__))
     args = self._ctx.args
     data = dt.data.Mnist(batch_size=args.batch_size, valid_size=args.valid_size,
                          num_workers=1, pin_memory=self.use_cuda)
     data.init_data()
     data.load_data()
     self._data = data
     return True
コード例 #2
0
ファイル: mnist_conv.py プロジェクト: cicicici/deeptensor
    def build_model(self):
        dt.trace(dt.DC.MODEL, "[{}] ({}) build model".format(self.tag, type(self).__name__))

        self._model = MnistNet()

        #model = torchvision.models.resnet50(False)
        # Have ResNet model take in grayscale rather than RGB
        #model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

        return True
コード例 #3
0
ファイル: estimator.py プロジェクト: cicicici/deeptensor
    def __init__(self, ctx):
        self.tag = "EST::BASE"
        dt.trace(dt.DC.MODEL,
                 "[{}] ({}) __init__".format(self.tag,
                                             type(self).__name__))
        self._ctx = ctx
        self._trainer = None

        self._data = None
        self._model = None
        self._criterion = None
        self._optimizer = None
        self._train_hooks = []
        self._valid_hooks = []
コード例 #4
0
    def load_data(self):
        dt.trace(dt.DC.DATA, "[{}] load data".format(self.tag))

        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])

        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])

        kwargs = {'num_workers': self._num_workers, 'pin_memory': True} if self._pin_memory else {}
        self.train.dataset = datasets.CIFAR10(self._data_dir, train=True, download=True, transform=transform_train)
        if dt.train.is_mp():
            # Horovod: use DistributedSampler to partition the training data.
            self.train.sampler = torch.utils.data.distributed.DistributedSampler(
                self.train.dataset, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=self._shuffle)
            self.train.loader = torch.utils.data.DataLoader(self.train.dataset,
                batch_size=self._batch_size, shuffle=False, sampler=self.train.sampler, **kwargs)
        else:
            self.train.loader = torch.utils.data.DataLoader(self.train.dataset,
                batch_size=self._batch_size, shuffle=self._shuffle, **kwargs)

        self.valid.dataset = datasets.CIFAR10(self._data_dir, train=False, transform=transform_test)
        if dt.train.is_mp():
            # Horovod: use DistributedSampler to partition the validation data.
            self.valid.sampler = torch.utils.data.distributed.DistributedSampler(
                self.valid.dataset, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False)
            self.valid.loader = torch.utils.data.DataLoader(self.valid.dataset,
                batch_size=self._valid_size, shuffle=False, sampler=self.valid.sampler, **kwargs)
        else:
            self.valid.loader = torch.utils.data.DataLoader(self.valid.dataset,
                batch_size=self._valid_size, shuffle=False, **kwargs)

        self.test.dataset = datasets.CIFAR10(self._data_dir, train=False, transform=transform_test)
        if dt.train.is_mp():
            # Horovod: use DistributedSampler to partition the test data.
            self.test.sampler = torch.utils.data.distributed.DistributedSampler(
                self.test.dataset, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False)
            self.test.loader = torch.utils.data.DataLoader(self.test.dataset,
                batch_size=self._test_size, shuffle=False, sampler=self.test.sampler, **kwargs)
        else:
            self.test.loader = torch.utils.data.DataLoader(self.test.dataset,
                batch_size=self._test_size, shuffle=False, **kwargs)

        return self
コード例 #5
0
    def bind_estimator(self, est):

        # Estimiator
        self._est = est
        est.bind_trainer(self)
        est.build_train()

        # Load checkpoint
        sync_params = {
            'epoch_done': self._ctx.epoch_done,
            'global_step': self.global_step
        }
        if is_chief():
            model_params = dt.model.load(est.model, self._saver.model_latest)
            optimizer_params = dt.optimizer.load(est.optimizer,
                                                 self._saver.optimizer_latest)
            if optimizer_params:
                sync_params['epoch_done'] = optimizer_params.epoch
                sync_params['global_step'] = optimizer_params.step
                #self._ctx.stats = optimizer_params.stats

        sync_params = mp_broadcast(sync_params)
        self._ctx.epoch_done = int(sync_params['epoch_done'])
        self.set_global_step(int(sync_params['global_step']))
        set_mono_step(self.global_step)
        dt.trace(
            dt.DC.TRAIN,
            '[CHECKPOINT] epoch_done {}, global_step {}, mono_step {}'.format(
                self._ctx.epoch_done, self.global_step, mono_step()))

        if self.use_cuda:
            # Move model to GPU.
            est.model.cuda()

        # Make sure learning rate is up to date
        self.update_learning_rate(est.optimizer)

        # Horovod: broadcast parameters & optimizer state.
        hvd.broadcast_parameters(est.model.state_dict(),
                                 root_rank=chief_rank())
        hvd.broadcast_optimizer_state(est.optimizer, root_rank=chief_rank())

        # Horovod: (optional) compression algorithm.
        compression = hvd.Compression.fp16 if self._ctx.fp16_allreduce else hvd.Compression.none

        # Horovod: wrap optimizer with DistributedOptimizer.
        est.optimizer = hvd.DistributedOptimizer(
            est.optimizer,
            named_parameters=est.model.named_parameters(),
            compression=compression)
コード例 #6
0
ファイル: mnist.py プロジェクト: cicicici/deeptensor
    def load_data(self):
        dt.trace(dt.DC.DATA, "[{}] load data".format(self.tag))

        kwargs = {
            'num_workers': 1,
            'pin_memory': True
        } if self._pin_memory else {}
        self.train.dataset = datasets.MNIST(self._data_dir,
                                            train=True,
                                            download=True,
                                            transform=transforms.Compose([
                                                transforms.ToTensor(),
                                                transforms.Normalize(
                                                    (0.1307, ), (0.3081, ))
                                            ]))
        self.train.loader = torch.utils.data.DataLoader(
            self.train.dataset,
            batch_size=self._batch_size,
            shuffle=self._shuffle,
            **kwargs)

        self.valid.dataset = datasets.MNIST(self._data_dir,
                                            train=False,
                                            transform=transforms.Compose([
                                                transforms.ToTensor(),
                                                transforms.Normalize(
                                                    (0.1307, ), (0.3081, ))
                                            ]))
        self.valid.loader = torch.utils.data.DataLoader(
            self.valid.dataset,
            batch_size=self._valid_size,
            shuffle=False,
            **kwargs)

        self.test.dataset = datasets.MNIST(self._data_dir,
                                           train=False,
                                           transform=transforms.Compose([
                                               transforms.ToTensor(),
                                               transforms.Normalize((0.1307, ),
                                                                    (0.3081, ))
                                           ]))
        self.test.loader = torch.utils.data.DataLoader(
            self.test.dataset,
            batch_size=self._test_size,
            shuffle=False,
            **kwargs)

        return self
コード例 #7
0
    def init_data(self):
        dt.trace(dt.DC.DATA, "[{}] init data".format(self.tag))

        self.train, self.valid, self.test = dt.Opt(), dt.Opt, dt.Opt()

        self.train.batch_size = self._batch_size
        self.valid.batch_size = self._valid_size
        self.test.batch_size = self._test_size

        self.train.num_total = ImageNet.TRAIN_NUM_PER_EPOCH
        self.valid.num_total = ImageNet.VALID_NUM_PER_EPOCH
        self.test.num_total = ImageNet.TEST_NUM_PER_EPOCH

        self.train.num_batch = int(math.ceil(ImageNet.TRAIN_NUM_PER_EPOCH / self._batch_size / hvd.size()))
        self.valid.num_batch = int(math.ceil(ImageNet.VALID_NUM_PER_EPOCH / self._valid_size / hvd.size()))
        self.test.num_batch = int(math.ceil(ImageNet.TEST_NUM_PER_EPOCH / self._test_size / hvd.size()))

        return self
コード例 #8
0
    def init_data(self):
        dt.trace(dt.DC.DATA, "[{}] init data".format(self.tag))

        self.train, self.valid, self.test = dt.Opt(), dt.Opt, dt.Opt()

        self.train.batch_size = self._batch_size
        self.valid.batch_size = self._valid_size
        self.test.batch_size = self._test_size

        self.train.num_total = Cifar10.TRAIN_NUM_PER_EPOCH
        self.valid.num_total = Cifar10.VALID_NUM_PER_EPOCH
        self.test.num_total = Cifar10.TEST_NUM_PER_EPOCH

        self.train.num_batch = int(math.ceil(Cifar10.TRAIN_NUM_PER_EPOCH / self._batch_size / hvd.size()))
        self.valid.num_batch = int(math.ceil(Cifar10.VALID_NUM_PER_EPOCH / self._valid_size / hvd.size()))
        self.test.num_batch = int(math.ceil(Cifar10.TEST_NUM_PER_EPOCH / self._test_size / hvd.size()))

        self.classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

        return self
コード例 #9
0
ファイル: cifar10_conv.py プロジェクト: cicicici/deeptensor
    def build_model(self):
        dt.trace(dt.DC.MODEL,
                 "[{}] ({}) build model".format(self.tag,
                                                type(self).__name__))

        #self._model = dt.model.cifar.VGG('VGG19')        # target 92.64%
        self._model = dt.model.cifar.ResNet18()  # target 93.02%
        #self._model = dt.model.cifar.ResNet50()          # target 93.62%
        #self._model = dt.model.cifar.ResNet101()         # target 93.75%
        #self._model = dt.model.cifar.ResNet152()         # 8-gpu  94.2+%
        #self._model = dt.model.cifar.PreActResNet18()    # target 95.11%, NAN
        #self._model = dt.model.cifar.GoogLeNet()
        #self._model = dt.model.cifar.DenseNet121()       # target 95.04%
        #self._model = dt.model.cifar.ResNeXt29_32x4d()   # target 94.73%
        #self._model = dt.model.cifar.ResNeXt29_2x64d()   # target 94.82%
        #self._model = dt.model.cifar.MobileNet()
        #self._model = dt.model.cifar.MobileNetV2()       # target 94.43%
        #self._model = dt.model.cifar.DPN92()             # target 95.16%
        #self._model = dt.model.cifar.ShuffleNetG2()
        #self._model = dt.model.cifar.SENet18()
        #self._model = dt.model.cifar.ShuffleNetV2(1)
        #self._model = dt.model.cifar.EfficientNetB0()

        return True
コード例 #10
0
    def build_model(self):
        dt.trace(dt.DC.MODEL, "[{}] ({}) build model".format(self.tag, type(self).__name__))
        args = self._ctx.args
        pretrained = (args.pretrained > 0)

        if args.model_name == 'efficientnet':
            if args.model_type == 'b0':
                self._model = dt.model.efficientnet.efficientnet_b0(pretrained=pretrained)
            elif args.model_type == 'b1':
                self._model = dt.model.efficientnet.efficientnet_b1(pretrained=pretrained)
            elif args.model_type == 'b2':
                self._model = dt.model.efficientnet.efficientnet_b2(pretrained=pretrained)
            elif args.model_type == 'b3':
                self._model = dt.model.efficientnet.efficientnet_b3(pretrained=pretrained)
            elif args.model_type == 'b4':
                self._model = dt.model.efficientnet.efficientnet_b4(pretrained=pretrained)
            elif args.model_type == 'b5':
                self._model = dt.model.efficientnet.efficientnet_b5(pretrained=pretrained)
            elif args.model_type == 'b6':
                self._model = dt.model.efficientnet.efficientnet_b6(pretrained=pretrained)
            elif args.model_type == 'b7':
                self._model = dt.model.efficientnet.efficientnet_b7(pretrained=pretrained)
        elif args.model_name == 'efficientnet_lm':
            if args.model_type == 'b0' or \
               args.model_type == 'b1' or \
               args.model_type == 'b2' or \
               args.model_type == 'b3' or \
               args.model_type == 'b4' or \
               args.model_type == 'b5' or \
               args.model_type == 'b6' or \
               args.model_type == 'b7':
                model_arch = "efficientnet-{}".format(args.model_type)
                if pretrained:
                    self._model = dt.model.efficientnet.EfficientNetLM.from_pretrained(model_arch)
                else:
                    self._model = dt.model.efficientnet.EfficientNetLM.from_name(model_arch)
        elif args.model_name == 'efficientnet_rw':
            if args.model_type == 'b0' or \
               args.model_type == 'b1' or \
               args.model_type == 'b2' or \
               args.model_type == 'b3' or \
               args.model_type == 'b4' or \
               args.model_type == 'b5' or \
               args.model_type == 'b6' or \
               args.model_type == 'b7':
                model_arch = "efficientnet_{}".format(args.model_type)
                self._model = dt.model.timm.create_model(model_arch, pretrained=pretrained)
        elif args.model_name == 'fairnas':
            if args.model_type == 'a':
                self._model = dt.model.fairnas.FairNasA()         # 8-gpu
        elif args.model_name == 'resnet_rw':
            #if dt.train.is_chief():
            #    dt.print_pp(dt.model.timm.list_models())
            if args.model_type == '34':
                self._model = dt.model.timm.create_model('resnet34', pretrained=pretrained)
            elif args.model_type == '50':
                self._model = dt.model.timm.create_model('resnet50', pretrained=pretrained)
        else:
            #if dt.train.is_chief():
            #    dt.print_pp(torchvision.models.__dict__)
            self._model = torchvision.models.__dict__[args.model_name](pretrained=pretrained)

        dt.info(dt.DC.TRAIN, "model {}, type {}, pretrained {}".format(args.model_name, args.model_type, args.pretrained))

        return True
コード例 #11
0
 def __init__(self, ctx):
     super(ImageNetEstimator, self).__init__(ctx)
     self.tag = "EST::IMAGENET"
     dt.trace(dt.DC.MODEL, "[{}] ({}) __init__".format(self.tag, type(self).__name__))
コード例 #12
0
 def __init__(self, ctx):
     super(ClassEstimator, self).__init__(ctx)
     self.tag = "EST::CLASS"
     dt.trace(dt.DC.MODEL,
              "[{}] ({}) __init__".format(self.tag,
                                          type(self).__name__))
コード例 #13
0
 def __init__(self):
     self.tag = "DATA::BASE"
     dt.trace(dt.DC.DATA,
              "[{}] ({}) __init__".format(self.tag,
                                          type(self).__name__))
コード例 #14
0
ファイル: mnist_conv.py プロジェクト: cicicici/deeptensor
 def __init__(self, opt, cfg):
     super(MnistEstimator, self).__init__(opt, cfg)
     self.tag = "EST::MNIST"
     dt.trace(dt.DC.MODEL, "[{}] ({}) __init__".format(self.tag, type(self).__name__))
コード例 #15
0
    def load_data(self):
        dt.trace(dt.DC.DATA, "[{}] load data".format(self.tag))

        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(self._out_size, interpolation=PIL.Image.BICUBIC),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(
                brightness=0.4,
                contrast=0.4,
                saturation=0.4,
            ),
            transforms.ToTensor(),
            transforms.Normalize(mean=ImageNet.MEAN_RGB, std=ImageNet.VAR_RGB),
        ])

        transform_test = transforms.Compose([
            transforms.Resize(self._out_size + ImageNet.CROP_PAD, interpolation=PIL.Image.BICUBIC),
            transforms.CenterCrop(self._out_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=ImageNet.MEAN_RGB, std=ImageNet.VAR_RGB),
        ])

        kwargs = {'num_workers': self._num_workers, 'pin_memory': True} if self._pin_memory else {}

        train_dataset_root = os.path.join(self._data_dir, ImageNet.TRAIN_DIR)
        self.train.dataset = datasets.ImageFolder(root=train_dataset_root, transform=transform_train)
        self.train.sampler = None
        if dt.train.is_mp():
            # Horovod: use DistributedSampler to partition the training data.
            self.train.sampler = torch.utils.data.distributed.DistributedSampler(
                self.train.dataset, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=self._shuffle)
            self.train.loader = torch.utils.data.DataLoader(self.train.dataset,
                batch_size=self._batch_size, shuffle=False, sampler=self.train.sampler, **kwargs)
        else:
            self.train.loader = torch.utils.data.DataLoader(self.train.dataset,
                batch_size=self._batch_size, shuffle=self._shuffle, **kwargs)

        valid_dataset_root = os.path.join(self._data_dir, ImageNet.VALIDATION_DIR)
        self.valid.dataset = datasets.ImageFolder(root=valid_dataset_root, transform=transform_test)
        self.valid.sampler = None
        if dt.train.is_mp():
            # Horovod: use DistributedSampler to partition the validation data.
            self.valid.sampler = torch.utils.data.distributed.DistributedSampler(
                self.valid.dataset, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False)
            self.valid.loader = torch.utils.data.DataLoader(self.valid.dataset,
                batch_size=self._valid_size, shuffle=False, sampler=self.valid.sampler, **kwargs)
        else:
            self.valid.loader = torch.utils.data.DataLoader(self.valid.dataset,
                batch_size=self._valid_size, shuffle=False, **kwargs)

        test_dataset_root = os.path.join(self._data_dir, ImageNet.TEST_DIR)
        self.test.dataset = datasets.ImageFolder(root=test_dataset_root, transform=transform_test)
        self.test.sampler = None
        if dt.train.is_mp():
            # Horovod: use DistributedSampler to partition the test data.
            self.test.sampler = torch.utils.data.distributed.DistributedSampler(
                self.test.dataset, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False)
            self.test.loader = torch.utils.data.DataLoader(self.test.dataset,
                batch_size=self._test_size, shuffle=False, sampler=self.test.sampler, **kwargs)
        else:
            self.test.loader = torch.utils.data.DataLoader(self.test.dataset,
                batch_size=self._test_size, shuffle=False, **kwargs)

        dt.trace(dt.DC.DATA, "[{}] loaded, train {}, valid {}, test {}".format(
            self.tag, len(self.train.dataset), len(self.valid.dataset), len(self.test.dataset)))

        return self
コード例 #16
0
ファイル: cifar10_conv.py プロジェクト: cicicici/deeptensor
 def __init__(self, ctx):
     super(Cifar10Estimator, self).__init__(ctx)
     self.tag = "EST::CIFAR10"
     dt.trace(dt.DC.MODEL,
              "[{}] ({}) __init__".format(self.tag,
                                          type(self).__name__))