예제 #1
0
    def learn_one_iter(self, high_res: Tensor):
        low_res = torch.clamp(
            F.interpolate(high_res,
                          scale_factor=self.scale_factor,
                          mode=self.downsampling_mode), 0.0, 1.0)

        data = self._cb_handler.on_batch_begin(
            {
                'high_res': high_res,
                'low_res': low_res
            }, True)
        high_res, low_res = data['high_res'], data['low_res']
        loss = self._cb_handler.after_losses(
            {"loss": self.compute_loss(low_res, high_res, True)}, True)["loss"]

        if self._cb_handler.on_backward_begin():
            loss.backward()
        if self._cb_handler.after_backward():
            self._optimizer.step()
            if self._cb_handler.after_step():
                self._optimizer.zero_grad()

            if get_device().type == 'cuda':
                mem = torch.cuda.memory_allocated(get_device())
                self._cb_handler.on_batch_end({
                    "loss": loss.cpu(),
                    "allocated_memory": mem
                })
            else:
                self._cb_handler.on_batch_end({"loss": loss})
예제 #2
0
    def learn_one_iter(self, high_res: Tensor):
        pyramid = [
            torch.clamp(
                F.interpolate(high_res,
                              scale_factor=scale,
                              mode=self.downsampling_mode), 0.0, 1.0)
            for scale in self.scale_factors if scale != 1.0
        ]
        pyramid += [high_res]

        data = self._cb_handler.on_batch_begin(
            {'pyramid_' + str(i): pyramid[i]
             for i in range(len(pyramid))}, True)
        pyramid = [data["pyramid_" + str(i)] for i in range(len(pyramid))]
        loss = self._cb_handler.after_losses(
            {"loss": self.compute_loss(pyramid, True)}, True)["loss"]

        if self._cb_handler.on_backward_begin():
            loss.backward()
        if self._cb_handler.after_backward():
            self._optimizer.step()
            if self._cb_handler.after_step():
                self._optimizer.zero_grad()

            if get_device().type == 'cuda':
                mem = torch.cuda.memory_allocated(get_device())
                self._cb_handler.on_batch_end({
                    "loss": loss.cpu(),
                    "allocated_memory": mem
                })
            else:
                self._cb_handler.on_batch_end({"loss": loss})
예제 #3
0
파일: learner.py 프로젝트: nhatsmrt/erc
 def __init__(
         self, train_data: DataLoader, val_data: DataLoader, model: Module,
         criterion: Module, optimizer: Optimizer, device=get_device(), mixup: bool = False, mixup_alpha: float = 0.4
 ):
     super().__init__(train_data, val_data, model, criterion, optimizer)
     self._device = device
     self._mixup = mixup
     if mixup:
         self._mixup_transformer = MixupTransformer(alpha=mixup_alpha)
예제 #4
0
def evaluate_fn(parameterization: Dict[str, Any], model: nn.Module,
                run: ExperimentRun) -> float:
    lr = parameterization["lr"]
    print("Evaluate at learning rate %f" % lr)

    # Set up train and validation data
    data = CIFAR10('data/', train=True, download=True, transform=ToTensor())
    train_size = int(0.8 * len(data))
    val_size = len(data) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(
        data, [train_size, val_size])
    train_dataset.dataset.transform = Compose([
        RandomHorizontalFlip(),
        RandomResizedCrop(size=32, scale=(0.95, 1.0)),
        ToTensor()
    ])

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=128,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=128,
                                             shuffle=False)
    print("Number of batches per epoch " + str(len(train_loader)))

    optimizer = SGD(model.parameters(),
                    weight_decay=0.0001,
                    lr=lr,
                    momentum=0.9)
    learner = SupervisedImageLearner(train_data=train_loader,
                                     val_data=val_loader,
                                     model=model,
                                     criterion=SmoothedCrossEntropy().to(
                                         get_device()),
                                     optimizer=optimizer,
                                     mixup=True)

    metrics = {"accuracy": Accuracy(), "loss": Loss()}

    callbacks = [
        ToDeviceCallback(),
        LRSchedulerCB(CosineAnnealingLR(optimizer, eta_min=0.024, T_max=405)),
        LossLogger(),
        ModelDBCB(run=run,
                  filepath="weights/model.pt",
                  metrics=metrics,
                  monitor='accuracy',
                  mode='max')
    ]

    return learner.learn(n_epoch=20,
                         callbacks=callbacks,
                         metrics=metrics,
                         final_metric='accuracy')
예제 #5
0
 def __init__(self,
              train_iterator: Iterator,
              val_iterator: Iterator,
              model: nn.Module,
              criterion: nn.Module,
              optimizer: Optimizer,
              device=get_device()):
     self._train_data = self._train_iterator = train_iterator
     self._val_iterator = val_iterator
     self._model = model.to(device)
     self._optimizer = optimizer
     self._criterion = criterion.to(device)
     self._device = device
예제 #6
0
def model_fn(parameterization: Dict[str, Any]) -> nn.Module:
    model = Sequential(
        ConvolutionalLayer(in_channels=3,
                           out_channels=16,
                           kernel_size=3,
                           activation=nn.ReLU),
        ResidualBlockPreActivation(in_channels=16, activation=nn.ReLU),
        ConvolutionalLayer(in_channels=16,
                           out_channels=32,
                           kernel_size=3,
                           activation=nn.ReLU),
        ResidualBlockPreActivation(in_channels=32, activation=nn.ReLU),
        FeedforwardBlock(in_channels=32,
                         out_features=10,
                         pool_output_size=2,
                         hidden_layer_sizes=(64, 32))).to(get_device())

    return model
예제 #7
0
def run_classifier_test():
    print("Starting classifier test")
    # progress_bar_test()
    torch.backends.cudnn.benchmark = True

    # data = CIFAR10('data/', train=True, download=True, transform=ToTensor())
    # train_size = int(0.8 * len(data))
    # val_size = len(data) - train_size
    # train_dataset, val_dataset = torch.utils.data.random_split(data, [train_size, val_size])
    # train_dataset.dataset.transform = Compose(
    #     [
    #         RandomHorizontalFlip(),
    #         RandomResizedCrop(size=32, scale=(0.95, 1.0)),
    #         # Cutout(length=16, n_holes=1),
    #         ToTensor()
    #     ]
    # )
    #
    # test_dataset = torchvision.datasets.CIFAR10('data/', train=False, download=True, transform=ToTensor())
    # kernel = partial(PolynomialKernel, dp=3, cp=2.0)


    train_val_dataset = ImageFolder(
        'data/imagenette-160/train',
        transform=Compose([
            Resize((128, 128)),
            ToTensor()
        ])
    )

    test_dataset = ImageFolder(
        'data/imagenette-160/val',
        transform=Compose([
            Resize((128, 128)),
            ToTensor()
        ])
    )

    train_size = int(0.8 * len(train_val_dataset))
    val_size = len(train_val_dataset) - train_size

    train_dataset, val_dataset = random_split(train_val_dataset, [train_size, val_size])

    train_dataset.dataset.transform = Compose(
        [
            RandomHorizontalFlip(),
            RandomResizedCrop(size=(128, 128), scale=(0.95, 1.0)),
            # Cutout(length=16, n_holes=1),
            ToTensor()
        ]
    )
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)

    class SEResNeXtShakeShake(ResNeXtBlock):
        def __init__(self, in_channels, reduction_ratio=16, cardinality=2, activation=nn.ReLU,
                     normalization=nn.BatchNorm2d):
            super(SEResNeXtShakeShake, self).__init__(
                branches=nn.ModuleList(
                    [
                        nn.Sequential(
                            ConvolutionalLayer(
                                in_channels, in_channels // 4, kernel_size=1, padding=0,
                                activation=activation, normalization=normalization
                            ),
                            ConvolutionalLayer(
                                in_channels // 4, in_channels, kernel_size=3, padding=1,
                                activation=activation, normalization=normalization
                            ),
                            # ConvolutionalLayer(
                            #     in_channels // 4, in_channels, kernel_size=1, padding=0,
                            #     activation=activation, normalization=normalization
                            # ),
                            SEBlock(in_channels, reduction_ratio)
                        ) for _ in range(cardinality)
                        ]
                ),
                use_shake_shake=True
            )

    class StandAloneMultiheadAttentionLayer(nn.Sequential):
        def __init__(
                self, num_heads, in_channels, out_channels, kernel_size, stride=1, padding=3,
                activation=nn.ReLU, normalization=nn.BatchNorm2d
        ):
            layers = [
                StandAloneMultiheadAttention(
                    num_heads=num_heads,
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=kernel_size,
                    stride=stride,
                    padding=padding,
                    bias=False
                ),
                activation(),
                normalization(num_features=out_channels),
            ]
            super(StandAloneMultiheadAttentionLayer, self).__init__(*layers)

    class SEResNeXtShakeShakeAttention(ResNeXtBlock):
        def __init__(self, num_heads, in_channels, reduction_ratio=16, cardinality=2, activation=nn.ReLU,
                     normalization=nn.BatchNorm2d):
            super(SEResNeXtShakeShakeAttention, self).__init__(
                branches=nn.ModuleList(
                    [
                        nn.Sequential(
                            ConvolutionalLayer(
                                in_channels=in_channels,
                                out_channels=in_channels // 2,
                                kernel_size=1,
                                activation=activation,
                                normalization=normalization
                            ),
                            StandAloneMultiheadAttentionLayer(
                                num_heads=num_heads,
                                in_channels=in_channels // 2,
                                out_channels=in_channels // 2,
                                kernel_size=3,
                                activation=activation,
                                normalization=normalization
                            ),
                            ConvolutionalLayer(
                                in_channels=in_channels // 2,
                                out_channels=in_channels,
                                kernel_size=1,
                                activation=activation,
                                normalization=normalization
                            ),
                            SEBlock(in_channels, reduction_ratio)
                        ) for _ in range(cardinality)
                        ]
                ),
                use_shake_shake=True
            )

    # layer_1 = ManifoldMixupModule(ConvolutionalLayer(in_channels=3, out_channels=16, kernel_size=3, activation=nn.ReLU))
    # block_1 = ManifoldMixupModule(SEResNeXtShakeShake(in_channels=16, activation=nn.ReLU))

    model = Sequential(
        ConvolutionalLayer(in_channels=3, out_channels=16, kernel_size=3, activation=nn.ReLU),
        SEResNeXtShakeShake(in_channels=16, activation=nn.ReLU),
        # layer_1,
        # block_1,
        ConvolutionalLayer(
            in_channels=16, out_channels=32,
            activation=nn.ReLU,
            kernel_size=2, stride=2
        ),
        SEResNeXtShakeShake(in_channels=32),
        ConvolutionalLayer(
            in_channels=32, out_channels=64,
            kernel_size=2, stride=2
        ),
        SEResNeXtShakeShake(in_channels=64),
        ConvolutionalLayer(
            in_channels=64, out_channels=128,
            kernel_size=2, stride=2
        ),
        SEResNeXtShakeShake(in_channels=128),
        ConvolutionalLayer(
            in_channels=128, out_channels=256,
            kernel_size=2, stride=2
        ),
        SEResNeXtShakeShake(in_channels=256),
        ConvolutionalLayer(
            in_channels=256, out_channels=512,
            kernel_size=2, stride=2
        ),
        SEResNeXtShakeShake(in_channels=512),
        # SEResNeXtShakeShakeAttention(num_heads=8, in_channels=512),
        FeedforwardBlock(
            in_channels=512,
            out_features=10,
            pool_output_size=2,
            hidden_layer_sizes=(256, 128)
        )
    ).to(get_device())

    # lsuv_init(module=model, input=get_first_batch(train_loader, callbacks = [ToDeviceCallback()])[0])

    # print(count_trainable_parameters(model)) # 14437816 3075928

    optimizer = SGD(model.parameters(), weight_decay=0.0001, lr=0.30, momentum=0.9)
    learner = SupervisedImageLearner(
        train_data=train_loader,
        val_data=val_loader,
        model=model,
        criterion=SmoothedCrossEntropy().to(get_device()),
        optimizer=optimizer,
        mixup=True
    )

    # lr_finder = LRFinder(
    #     model=model,
    #     train_data=train_loader,
    #     criterion=SmoothedCrossEntropy(),
    #     optimizer=partial(SGD, lr=0.074, weight_decay=0.0001, momentum=0.9),
    #     device=get_device()
    # )
    # lr_finder.find_lr(warmup=100, callbacks=[ToDeviceCallback()])

    swa = StochasticWeightAveraging(learner, average_after=5025, update_every=670)
    callbacks = [
        # ManifoldMixupCallback(learner=learner, modules=[layer_1, block_1]),
        ToDeviceCallback(),
        InputProgressiveResizing(initial_size=80, max_size=160, upscale_every=10, upscale_factor=math.sqrt(2)),
        # MixedPrecisionV2(),
        Tensorboard(),
        NaNWarner(),
        # ReduceLROnPlateauCB(optimizer, monitor='accuracy', mode='max', patience=10),
        LRSchedulerCB(CosineAnnealingLR(optimizer, eta_min=0.10, T_max=335)),
        swa,
        LossLogger(),
        ModelCheckpoint(learner=learner, filepath="weights/model.pt", monitor='accuracy', mode='max'),
        ProgressBarCB()
    ]

    metrics = {
        "accuracy": Accuracy(),
        "loss": Loss()
    }

    final = learner.learn(
        n_epoch=500,
        callbacks=callbacks,
        metrics=metrics,
        final_metric='accuracy'
    )

    print(final)
    load_model(model=model, path="weights/model.pt")
    classifier = ImageClassifier(model, tta_transform=Compose([
        ToPILImage(),
        RandomHorizontalFlip(),
        RandomResizedCrop(size=(128, 128), scale=(0.95, 1.0)),
        ToTensor()
    ]))
    print(classifier.evaluate(test_loader))

    print("Test SWA:")
    model = swa.get_averaged_model()
    classifier = ImageClassifier(model, tta_transform=Compose([
        ToPILImage(),
        RandomHorizontalFlip(),
        RandomResizedCrop(size=(128, 128), scale=(0.95, 1.0)),
        ToTensor()
    ]))
    print(classifier.evaluate(test_loader))
예제 #8
0
    SEResNeXtShakeShake(in_channels=128),
    ConvolutionalLayer(in_channels=128,
                       out_channels=256,
                       kernel_size=2,
                       stride=2),
    SEResNeXtShakeShake(in_channels=256),
    ConvolutionalLayer(in_channels=256,
                       out_channels=512,
                       kernel_size=2,
                       stride=2),
    SEResNeXtShakeShake(in_channels=512),
    # SEResNeXtShakeShakeAttention(num_heads=8, in_channels=512),
    FeedforwardBlock(in_channels=512,
                     out_features=10,
                     pool_output_size=2,
                     hidden_layer_sizes=(256, 128))).to(get_device())

# lr_finder = LRFinder(
#     model=model,
#     train_data=train_loader,
#     criterion=SmoothedCrossEntropy(),
#     optimizer=partial(LAMB, lr=0.074, weight_decay=0.01),
#     device=get_device()
# )
# lr_finder.find_lr(warmup=100, callbacks=[ToDeviceCallback()])

# lsuv_init(module=model, input=get_first_batch(train_loader, callbacks = [ToDeviceCallback()])[0])

# print(count_trainable_parameters(model)) # 14437816 3075928

# optimizer = LARS(model.parameters(), weight_decay=0.0001, lr=0.10, momentum=0.9)
예제 #9
0
def run_test(encoder=None,
             style_weight=1e5,
             content_weight=1.0,
             total_variation_weight=1e-4,
             n_epoch=100,
             print_every=100,
             eval_every=1,
             batch_size=4,
             style_layers={0, 7, 14, 27, 40},
             content_layers={30},
             train_ratio=0.95,
             img_dim=(128, 128),
             style_path="mouse.png",
             save_path="weights/model.pt"):
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    images = UnlabelledImageDataset("MiniCOCO/128/", img_dim=img_dim)
    train_size = int(train_ratio * len(images))
    val_size = len(images) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(
        images, [train_size, val_size])

    style = pil_to_tensor(Image.open(style_path).convert("RGB"))
    dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
    dataloader_val = DataLoader(val_dataset,
                                shuffle=True,
                                batch_size=batch_size)

    feature_extractor = FeatureExtractor(model=vgg19_bn,
                                         fine_tune=False,
                                         mean=mean,
                                         std=std,
                                         device=get_device())
    if encoder is None:
        encoder = Sequential(
            ConvolutionalLayer(3,
                               16,
                               padding=1,
                               stride=2,
                               normalization=InstanceNorm2d),
            SEResidualBlockPreActivation(16, normalization=InstanceNorm2d),
            ConvolutionalLayer(16,
                               32,
                               padding=1,
                               stride=2,
                               normalization=InstanceNorm2d),
            SEResidualBlockPreActivation(32, normalization=InstanceNorm2d),
            ConvolutionalLayer(32,
                               64,
                               padding=1,
                               stride=2,
                               normalization=InstanceNorm2d),
            SEResidualBlockPreActivation(64, normalization=InstanceNorm2d),
            ConvolutionalLayer(64,
                               128,
                               padding=1,
                               stride=2,
                               normalization=InstanceNorm2d),
            SEResidualBlockPreActivation(128, normalization=InstanceNorm2d),
            ConvolutionalLayer(128,
                               256,
                               padding=1,
                               stride=2,
                               normalization=InstanceNorm2d),
        )
    model = CustomDynamicUnet(encoder=encoder,
                              normalization=InstanceNorm2d,
                              n_classes=3,
                              y_range=(0, 1),
                              blur=True)
    print(model)

    learner = StyleTransferLearner(
        dataloader,
        dataloader_val,
        style,
        model,
        feature_extractor,
        style_layers=style_layers,
        content_layers=content_layers,
        style_weight=style_weight,
        content_weight=content_weight,
        total_variation_weight=total_variation_weight,
        device=get_device())
    learner.learn(n_epoch=n_epoch,
                  print_every=print_every,
                  eval_every=eval_every,
                  draw=True,
                  save_path=save_path)
예제 #10
0
def run_test_multiple(style_weight=10.0,
                      content_weight=1.0,
                      total_variation_weight=0.1,
                      n_epoch=100,
                      batch_size=8,
                      style_path="./data/train_9/"):
    from nntoolbox.vision.learner import MultipleStylesTransferLearner
    from nntoolbox.vision.utils import UnlabelledImageDataset, PairedDataset, UnlabelledImageListDataset
    from nntoolbox.utils import get_device
    from nntoolbox.callbacks import Tensorboard, MultipleMetricLogger,\
        ModelCheckpoint, ToDeviceCallback, ProgressBarCB, MixedPrecisionV2, LRSchedulerCB
    # from nntoolbox.optim.lr_scheduler import FunctionalLR
    from torch.optim.lr_scheduler import LambdaLR
    from src.models import GenericDecoder, MultipleStyleTransferNetwork, \
        PixelShuffleDecoder, PixelShuffleDecoderV2, MultipleStyleUNet, SimpleDecoder
    from torchvision.models import vgg19
    from torch.utils.data import DataLoader
    from torchvision.transforms import Compose, Resize, RandomCrop
    from torch.optim import Adam

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    print("Begin creating dataset")
    style_paths_train = ["./data/train_" + str(i) + "/" for i in range(1, 8)]
    style_paths_val = ["./data/train_8/", "./data/train_9/"]

    content_images = UnlabelledImageListDataset(
        "data/train2014/",
        transform=Compose([Resize(512), RandomCrop((256, 256))]))
    train_style = UnlabelledImageListDataset(
        style_paths_train,
        transform=Compose([Resize(512), RandomCrop((256, 256))]))
    val_style = UnlabelledImageListDataset(
        style_paths_val,
        transform=Compose([Resize(512), RandomCrop((256, 256))]))

    # img_dim = (128, 128)
    # # content_images = UnlabelledImageDataset("MiniCOCO/128/", img_dim=img_dim)
    # # style_images = UnlabelledImageDataset(style_path, img_dim=img_dim)
    #
    #
    # content_images = UnlabelledImageListDataset("data/", img_dim=img_dim)
    # style_images = UnlabelledImageListDataset("data/train_9/", img_dim=img_dim)

    print("Begin splitting data")
    train_size = int(0.80 * len(content_images))
    val_size = len(content_images) - train_size
    train_content, val_content = torch.utils.data.random_split(
        content_images, [train_size, val_size])

    train_dataset = PairedDataset(train_content, train_style)
    val_dataset = PairedDataset(val_content, val_style)

    # train_sampler = BatchSampler(RandomSampler(train_dataset), batch_size=8, drop_last=True)
    train_sampler = RandomSampler(train_dataset,
                                  replacement=True,
                                  num_samples=8)
    val_sampler = RandomSampler(val_dataset, replacement=True, num_samples=8)

    print("Begin creating data dataloaders")
    dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=8)
    dataloader_val = DataLoader(val_dataset, sampler=val_sampler, batch_size=8)
    # print(len(dataloader))

    print("Creating models")
    feature_extractor = FeatureExtractor(model=vgg19(True),
                                         fine_tune=False,
                                         mean=mean,
                                         std=std,
                                         device=get_device(),
                                         last_layer=20)
    print("Finish creating feature extractor")

    decoder = PixelShuffleDecoderV2()
    # decoder = SimpleDecoder()
    print("Finish creating decoder")
    model = MultipleStyleTransferNetwork(encoder=FeatureExtractor(
        model=vgg19(True),
        fine_tune=False,
        mean=mean,
        std=std,
        device=get_device(),
        last_layer=20),
                                         decoder=decoder,
                                         extracted_feature=20)
    # model = MultipleStyleUNet(
    #     encoder=FeatureExtractorSequential(
    #         model=vgg19(True), fine_tune=False,
    #         mean=mean, std=std, last_layer=20
    #     ),
    #     extracted_feature=20
    # )
    # optimizer = Adam(model.parameters())
    optimizer = Adam(model.parameters(), lr=1e-4)
    lr_scheduler = LRSchedulerCB(scheduler=LambdaLR(optimizer,
                                                    lr_lambda=lambda iter: 1 /
                                                    (1.0 + 5e-5 * iter)),
                                 timescale='iter')
    learner = MultipleStylesTransferLearner(
        dataloader,
        dataloader_val,
        model,
        feature_extractor,
        optimizer=optimizer,
        style_layers={1, 6, 11, 20},
        total_variation_weight=total_variation_weight,
        style_weight=style_weight,
        content_weight=content_weight,
        device=get_device())

    every_iter = eval_every = print_every = compute_num_batch(
        len(train_style), batch_size)
    # every_iter = eval_every = print_every = compute_num_batch(len(val_style), batch_size)
    n_iter = every_iter * n_epoch

    callbacks = [
        ToDeviceCallback(),
        # MixedPrecisionV2(),
        Tensorboard(every_iter=every_iter, every_epoch=1),
        MultipleMetricLogger(iter_metrics=[
            "content_loss", "style_loss", "total_variation_loss", "loss"
        ],
                             print_every=print_every),
        lr_scheduler,
        ModelCheckpoint(learner=learner,
                        save_best_only=False,
                        filepath='weights/model.pt'),
        # ProgressBarCB(range(print_every))
    ]
    learner.learn(n_iter=n_iter, callbacks=callbacks, eval_every=eval_every)
예제 #11
0
TEXT = data.Field(tokenize='spacy', include_lengths=True, fix_length=500)
LABEL = data.LabelField(dtype=torch.float)
# train_data, val_data, test_data = SST.splits(
#     text_field=TEXT,
#     label_field=LABEL
# )

train_val_data, test_data = IMDB.splits(TEXT, LABEL)
train_data, val_data = train_val_data.split(split_ratio=0.8)

train_iterator, val_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, val_data, test_data),
    batch_size=BATCH_SIZE,
    sort_within_batch=True,
    device=get_device()
)

TEXT.build_vocab(train_data, max_size=MAX_VOCAB_SIZE, vectors="glove.6B.100d")
LABEL.build_vocab(train_data)

# max_length = 0
# for batch in train_iterator:
#     texts, text_lengths = batch.text
#     if len(texts) > max_length:
#         max_length = len(texts)
#
# print(max_length)

INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
예제 #12
0
                                         shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=128,
                                          shuffle=False)

# print(count_trainable_parameters(model)) # 14437816 3075928

optimizer = SGD(get_trainable_parameters(model),
                weight_decay=0.0001,
                lr=0.30,
                momentum=0.9)
learner = SupervisedImageLearner(train_data=train_loader,
                                 val_data=val_loader,
                                 model=model,
                                 criterion=SmoothedCrossEntropy().to(
                                     get_device()),
                                 optimizer=optimizer,
                                 mixup=True)

# lr_finder = LRFinder(
#     model=model,
#     train_data=train_loader,
#     criterion=SmoothedCrossEntropy(),
#     optimizer=partial(SGD, lr=0.074, weight_decay=0.0001, momentum=0.9),
#     device=get_device()
# )
# lr_finder.find_lr(warmup=100, callbacks=[ToDeviceCallback()])

swa = StochasticWeightAveraging(learner, average_after=5025, update_every=670)
callbacks = [
    # ManifoldMixupCallback(learner=learner, modules=[layer_1, block_1]),