Exemple #1
0
def main():
    epochs = 5
    num_class = 10
    output_path = './output/catalyst'

    # Use if you want to fix seed
    # catalyst.utils.set_global_seed(42)
    # catalyst.utils.prepare_cudnn(deterministic=True)

    model = get_model()
    train_loader, val_loader = get_loaders()
    loaders = {"train": train_loader, "valid": val_loader}

    optimizer, lr_scheduler = get_optimizer(model=model)
    criterion = get_criterion()

    runner = SupervisedRunner(device=catalyst.utils.get_device())
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=lr_scheduler,
        loaders=loaders,
        logdir=output_path,
        callbacks=[AccuracyCallback(num_classes=num_class, accuracy_args=[1])],
        num_epochs=epochs,
        main_metric="accuracy01",
        minimize_metric=False,
        fp16=None,
        verbose=True
    )
Exemple #2
0
def train(
    in_csv: str,
    in_dir: str,
    model: str = 'resnet18',
    fold: int = None,
    n_epochs: int = 30,
    image_size: int = 224,
    augmentation: str = 'medium',
    learning_rate: float = 3e-3,
    n_milestones: int = 5,
    batch_size: int = 256,
    n_workers: int = 4,
    fast: bool = False,
    logdir: str = '.',
    verbose: bool = False
):
    model = get_model(model=model)
    loss = criterion.FocalLossMultiClass()  # CrossEntropyLoss
    lr_scaled = learning_rate * (batch_size / 256)  # lr linear scaling
    optimizer = torch.optim.Adam(model.parameters(), lr=lr_scaled)
    scheduler = schedulers.MultiStepLR(
        optimizer,
        milestones=[5, 10, 20, 30, 40],
        gamma=0.3
    )

    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=loss,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=get_dataloaders(
            in_csv=in_csv,
            in_dir=in_dir,
            stages=['train', 'valid'],
            fold=fold,
            batch_size=batch_size,
            n_workers=n_workers,
            image_size=(image_size, image_size),
            augmentation=augmentation,
            fast=fast
        ),
        callbacks=[
            AccuracyCallback(accuracy_args=[1]),
            BinaryAUCCallback()
        ],
        logdir=logdir,
        num_epochs=n_epochs,
        verbose=verbose
    )
Exemple #3
0
def get_callbacks(class_names):
    num_classes = len(class_names)
    return [
        AccuracyCallback(num_classes=num_classes),
        AUCCallback(
            num_classes=num_classes,
            input_key="targets_one_hot",
            class_names=class_names
        ),
        F1ScoreCallback(
            input_key="targets_one_hot",
            activation="Softmax"
        )
    ]
Exemple #4
0
def infer(
    config_path,
    log_dir
    ):
    """
        Inference:
            1. loaders
            2. model
    """

    # quering params from experiment config
    batch_size = 116


    test_dataset = LipreadingDataset(
        "test")

    loaders = {
        "infer": DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=0,
            drop_last=False,)
    }

    model = LipNext()

    device = "cuda" if torch.cuda.is_available() else "cpu"
    runner = SupervisedRunner(device=device)

    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            AccuracyCallback(accuracy_args=[1, 3]),
            InferenceCallback(),
            CheckpointCallbackV2(
                config_path=config_path,
                resume=("/home/dmitry.klimenkov/Documents/projects/visper_pytorch/logdir"
                    "/Mobi-VSR-5W-mixed_aligned_patience5_sometests/checkpoints/train.0.35.8553.pth"))
            # NegativeMiningCallback()
        ],
        state_kwargs={
            "log_dir": log_dir
        },
        check=True
    )
Exemple #5
0
def main(args):
    logdir = "./logdir"
    num_epochs = 42

    # detect gpu
    device = utils.get_device()
    utils.fp
    print(f"device: {device}")

    # dataset
    trainset = ImageNetK(
        '/run/media/mooziisp/仓库/datasets/Kaggle-ILSVRC/ILSVRC',
        split='train',
        transform=transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor()
        ]))
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=64,
                                              shuffle=True,
                                              num_workers=2,
                                              pin_memory=True)

    loaders = {"train": trainloader}

    # define net
    net = models.resnet18(pretrained=False, num_classes=1000)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=1e-4)

    # trainer
    runner = SupervisedRunner(device=device)
    runner.train(model=net,
                 criterion=criterion,
                 optimizer=optimizer,
                 loaders=loaders,
                 logdir=logdir,
                 callbacks=[AccuracyCallback(num_classes=1000)],
                 num_epochs=num_epochs,
                 verbose=True)
 def get_callbacks(self):
     callbacks_list = [PrecisionRecallF1ScoreCallback(num_classes=4),#DiceCallback(),
                       EarlyStoppingCallback(**self.cb_params["earlystop"]),
                       AccuracyCallback(**self.cb_params["accuracy"]),
                       ]
     ckpoint_params = self.cb_params["checkpoint_params"]
     if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck
         mode = ckpoint_params["mode"].lower()
         if mode == "full":
             print("Stateful loading...")
             ckpoint_p = Path(ckpoint_params["checkpoint_path"])
             fname = ckpoint_p.name
             # everything in the path besides the base file name
             resume_dir = str(ckpoint_p.parents[0])
             print(f"Loading {fname} from {resume_dir}. \
                   \nCheckpoints will also be saved in {resume_dir}.")
             # adding the checkpoint callback
             callbacks_list = callbacks_list + [CheckpointCallback(resume=fname,
                                                                   resume_dir=resume_dir),]
         elif mode == "model_only":
             print("Loading weights into model...")
             self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model)
     return callbacks_list
Exemple #7
0
    loaders = OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = val_loader

    runner = dl.SupervisedRunner(device=tu.device,
                                 input_key="image",
                                 input_target_key="label",
                                 output_key="logits")

    callbacks = [
        CriterionCallback(input_key="label",
                          output_key="logits",
                          prefix="loss"),
        AccuracyCallback(input_key="label",
                         output_key="logits",
                         prefix="acc",
                         activation="Sigmoid"),
        OptimizerCallback(accumulation_steps=2),
        #MixupCallback(alpha=0.3, input_key="label", output_key="logits", fields=("image", ))
    ]
    if TRAINING:
        runner.train(model=model,
                     criterion=nn.CrossEntropyLoss(),
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     logdir=LOGDIR,
                     num_epochs=EPOCHS,
                     fp16=tu.fp16_params,
                     callbacks=callbacks,
                     verbose=True,
Exemple #8
0
                              shuffle=True,
                              drop_last=True)

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = GenderModel()
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    runner = SupervisedRunner(device=device)
    loaders = {'train': train_loader}
    logdir = str(
        DIR_DATA_LOGS /
        'audio') + '/gender/' + datetime.now().strftime("%Y%m%d-%H%M%S")

    callbacks = [
        AccuracyCallback(),
        F1ScoreCallback(),
        ConfusionMatrixCallback(num_classes=2, class_names=['female', 'male'])
    ]

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        callbacks=callbacks,
        logdir=logdir,
        num_epochs=epochs,
        # verbose=True
    )
Exemple #9
0
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train, val, test),
    batch_sizes=(64, 64, 64),
    device=device,
    repeat=False,
    sort=False)

train_loader = BucketIteratorWrapper(train_iter)
valid_loader = BucketIteratorWrapper(val_iter)
loaders = {"train": train_loader, "valid": valid_loader}

TEXT.build_vocab(train, min_freq=2)
LABELS.build_vocab(train)

model = RNN(len(TEXT.vocab.stoi) + 1, num_layers=2, output_size=4)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

runner = SupervisedRunner()

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir="./logdir",
    callbacks=[AccuracyCallback(num_classes=4, accuracy_args=[1])],
    num_epochs=10,
    verbose=True,
)
Exemple #10
0
def main():
    # setup config
    cfg = config()
    cfg['device'] = torch.device(
        "cuda" if torch.cuda.is_available() else "cpu")
    timestr = time.strftime("%Y%m%d-%H%M%S")
    cfg['logdir'] += f"{cfg['arch']}_"
    cfg['logdir'] += f"{cfg['exp_idx']}_"
    cfg['logdir'] += f"{cfg['input_size']}_"
    cfg['logdir'] += f"{cfg['criterion']}_"
    cfg['logdir'] += f"{cfg['optimizer']}_"
    cfg['logdir'] += f"split{cfg['data_split']}_"
    cfg['logdir'] += timestr
    set_global_seed(cfg['random_state'])
    pprint(cfg)

    # load data
    train_df = pd.read_csv(cfg['train_csv_path'])
    test_df = pd.read_csv(cfg['test_csv_path'])
    print(len(train_df), len(test_df))
    train_img_weights = compute_dataset_weights(train_df)

    train_transforms, test_transforms = get_transforms(cfg['input_size'])
    train_dataset = LeafDataset(
        img_root=cfg['img_root'],
        df=train_df,
        img_transforms=train_transforms,
        is_train=True,
    )

    test_dataset = LeafDataset(
        img_root=cfg['img_root'],
        df=test_df,
        img_transforms=test_transforms,
        is_train=False,
    )
    print(
        f"Training set size:{len(train_dataset)}, Test set size:{len(test_dataset)}")

    # prepare train and test loader
    if cfg['sampling'] == 'weighted':
        # image weight based on statistics
        train_img_weights = compute_dataset_weights(train_df)
        # weighted sampler
        weighted_sampler = WeightedRandomSampler(
            weights=train_img_weights, num_samples=len(train_img_weights), replacement=False)
        # batch sampler from weigted sampler
        batch_sampler = BatchSampler(
            weighted_sampler, batch_size=cfg['batch_size'], drop_last=True)
        # train loader
        train_loader = DataLoader(
            train_dataset, batch_sampler=batch_sampler, num_workers=4)
    elif cfg['sampling'] == 'normal':
        train_loader = DataLoader(
            train_dataset, cfg['batch_size'], shuffle=True, num_workers=2)

    test_loader = DataLoader(
        test_dataset, cfg['test_batch_size'], shuffle=False, num_workers=1, drop_last=True)

    loaders = {
        'train': train_loader,
        'valid': test_loader
    }

    # model setup
    model = timm.create_model(model_name=cfg['arch'], num_classes=len(
        cfg['class_names']), drop_rate=0.5, pretrained=True)
    model.train()

    # loss
    if cfg['criterion'] == 'label_smooth':
        criterion = LabelSmoothingCrossEntropy()
    elif cfg['criterion'] == 'cross_entropy':
        criterion = nn.CrossEntropyLoss()

    # optimizer
    if cfg['optimizer'] == 'adam':
        optimizer = torch.optim.Adam(
            model.parameters(), lr=cfg['lr'], weight_decay=cfg['wd'])
    elif cfg['optimizer'] == 'adamw':
        optimizer = AdamW(
            model.parameters(), lr=cfg['lr'], weight_decay=cfg['wd'])
    elif cfg['optimizer'] == 'radam':
        optimizer = RAdam(
            model.parameters(), lr=cfg['lr'], weight_decay=cfg['wd'])

    # learning schedule
    if cfg['lr_schedule'] == 'reduce_plateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, factor=0.5, patience=4)

    # trainer
    runner = SupervisedRunner(device=cfg['device'])
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,

        callbacks=[
            AccuracyCallback(
                num_classes=len(cfg['class_names']),
                threshold=0.5,
                activation="Softmax"
            ),
        ],
        logdir=cfg['logdir'],
        num_epochs=cfg['num_epochs'],
        verbose=cfg['verbose'],
        # set this true to run for 3 epochs only
        check=cfg['check'],
    )
Exemple #11
0
for param in resnet.layer4.parameters():
    param.requires_grad = True

loss_fn = nn.CrossEntropyLoss()
opt = torch.optim.SGD(resnet.parameters(), lr=0.01, momentum=0.9)
logdir = '/tmp/protein/logs/'
runner = SupervisedRunner()
sched = OneCycleLR(opt,
                   num_steps=epochs * len(loaders['train']),
                   warmup_fraction=0.3,
                   lr_range=(0.1, 0.0001))

runner.train(model=resnet,
             criterion=loss_fn,
             optimizer=opt,
             loaders=loaders,
             logdir=logdir,
             num_epochs=epochs,
             scheduler=sched,
             callbacks=[
                 AccuracyCallback(num_classes=num_classes),
                 F1ScoreCallback(input_key="targets_one_hot",
                                 activation="Softmax")
             ],
             verbose=True)

print('Saving the trained model')
basedir = os.path.expanduser('~/data/protein/tmp/models')
os.makedirs(basedir, exist_ok=True)
torch.save(resnet, os.path.join(basedir, 'resnet50_simple.pth'))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-acc",
                        "--accumulation-steps",
                        type=int,
                        default=1,
                        help="Number of batches to process")
    parser.add_argument("--seed", type=int, default=42, help="Random seed")
    parser.add_argument("-v", "--verbose", action="store_true")
    parser.add_argument("--fast", action="store_true")
    parser.add_argument("-dd",
                        "--data-dir",
                        type=str,
                        required=True,
                        help="Data directory for INRIA sattelite dataset")
    parser.add_argument("-m",
                        "--model",
                        type=str,
                        default="resnet34_fpncat128",
                        help="")
    parser.add_argument("-b",
                        "--batch-size",
                        type=int,
                        default=8,
                        help="Batch Size during training, e.g. -b 64")
    parser.add_argument("-e",
                        "--epochs",
                        type=int,
                        default=100,
                        help="Epoch to run")
    # parser.add_argument('-es', '--early-stopping', type=int, default=None, help='Maximum number of epochs without improvement')
    # parser.add_argument('-fe', '--freeze-encoder', type=int, default=0, help='Freeze encoder parameters for N epochs')
    # parser.add_argument('-ft', '--fine-tune', action='store_true')
    parser.add_argument("-lr",
                        "--learning-rate",
                        type=float,
                        default=1e-3,
                        help="Initial learning rate")
    parser.add_argument(
        "--disaster-type-loss",
        type=str,
        default=None,  # [["ce", 1.0]],
        action="append",
        nargs="+",
        help="Criterion for classifying disaster type",
    )
    parser.add_argument(
        "--damage-type-loss",
        type=str,
        default=None,  # [["bce", 1.0]],
        action="append",
        nargs="+",
        help=
        "Criterion for classifying presence of building with particular damage type",
    )

    parser.add_argument("-l",
                        "--criterion",
                        type=str,
                        default=None,
                        action="append",
                        nargs="+",
                        help="Criterion")
    parser.add_argument("--mask4",
                        type=str,
                        default=None,
                        action="append",
                        nargs="+",
                        help="Criterion for mask with stride 4")
    parser.add_argument("--mask8",
                        type=str,
                        default=None,
                        action="append",
                        nargs="+",
                        help="Criterion for mask with stride 8")
    parser.add_argument("--mask16",
                        type=str,
                        default=None,
                        action="append",
                        nargs="+",
                        help="Criterion for mask with stride 16")
    parser.add_argument("--mask32",
                        type=str,
                        default=None,
                        action="append",
                        nargs="+",
                        help="Criterion for mask with stride 32")
    parser.add_argument("--embedding", type=str, default=None)

    parser.add_argument("-o",
                        "--optimizer",
                        default="RAdam",
                        help="Name of the optimizer")
    parser.add_argument(
        "-c",
        "--checkpoint",
        type=str,
        default=None,
        help="Checkpoint filename to use as initial model weights")
    parser.add_argument("-w",
                        "--workers",
                        default=8,
                        type=int,
                        help="Num workers")
    parser.add_argument("-a",
                        "--augmentations",
                        default="safe",
                        type=str,
                        help="Level of image augmentations")
    parser.add_argument("--transfer", default=None, type=str, help="")
    parser.add_argument("--fp16", action="store_true")
    parser.add_argument("--size", default=512, type=int)
    parser.add_argument("--fold", default=0, type=int)
    parser.add_argument("-s",
                        "--scheduler",
                        default="multistep",
                        type=str,
                        help="")
    parser.add_argument("-x", "--experiment", default=None, type=str, help="")
    parser.add_argument("-d",
                        "--dropout",
                        default=0.0,
                        type=float,
                        help="Dropout before head layer")
    parser.add_argument("-pl", "--pseudolabeling", type=str, required=True)
    parser.add_argument("-wd",
                        "--weight-decay",
                        default=0,
                        type=float,
                        help="L2 weight decay")
    parser.add_argument("--show", action="store_true")
    parser.add_argument("--dsv", action="store_true")
    parser.add_argument("--balance", action="store_true")
    parser.add_argument("--only-buildings", action="store_true")
    parser.add_argument("--freeze-bn", action="store_true")
    parser.add_argument("--crops",
                        action="store_true",
                        help="Train on random crops")
    parser.add_argument("--post-transform", action="store_true")

    args = parser.parse_args()
    set_manual_seed(args.seed)

    data_dir = args.data_dir
    num_workers = args.workers
    num_epochs = args.epochs
    learning_rate = args.learning_rate
    model_name = args.model
    optimizer_name = args.optimizer
    image_size = args.size, args.size
    fast = args.fast
    augmentations = args.augmentations
    fp16 = args.fp16
    scheduler_name = args.scheduler
    experiment = args.experiment
    dropout = args.dropout
    segmentation_losses = args.criterion
    verbose = args.verbose
    show = args.show
    accumulation_steps = args.accumulation_steps
    weight_decay = args.weight_decay
    fold = args.fold
    balance = args.balance
    only_buildings = args.only_buildings
    freeze_bn = args.freeze_bn
    train_on_crops = args.crops
    enable_post_image_transform = args.post_transform
    disaster_type_loss = args.disaster_type_loss
    train_batch_size = args.batch_size
    embedding_criterion = args.embedding
    damage_type_loss = args.damage_type_loss
    pseudolabels_dir = args.pseudolabeling

    # Compute batch size for validaion
    if train_on_crops:
        valid_batch_size = max(1,
                               (train_batch_size *
                                (image_size[0] * image_size[1])) // (1024**2))
    else:
        valid_batch_size = train_batch_size

    run_train = num_epochs > 0

    model: nn.Module = get_model(model_name, dropout=dropout).cuda()

    if args.transfer:
        transfer_checkpoint = fs.auto_file(args.transfer)
        print("Transfering weights from model checkpoint", transfer_checkpoint)
        checkpoint = load_checkpoint(transfer_checkpoint)
        pretrained_dict = checkpoint["model_state_dict"]

        transfer_weights(model, pretrained_dict)

    if args.checkpoint:
        checkpoint = load_checkpoint(fs.auto_file(args.checkpoint))
        unpack_checkpoint(checkpoint, model=model)

        print("Loaded model weights from:", args.checkpoint)
        report_checkpoint(checkpoint)

    if freeze_bn:
        torch_utils.freeze_bn(model)
        print("Freezing bn params")

    runner = SupervisedRunner(input_key=INPUT_IMAGE_KEY, output_key=None)
    main_metric = "weighted_f1"
    cmd_args = vars(args)

    current_time = datetime.now().strftime("%b%d_%H_%M")
    checkpoint_prefix = f"{current_time}_{args.model}_{args.size}_fold{fold}"

    if fp16:
        checkpoint_prefix += "_fp16"

    if fast:
        checkpoint_prefix += "_fast"

    if pseudolabels_dir:
        checkpoint_prefix += "_pseudo"

    if train_on_crops:
        checkpoint_prefix += "_crops"

    if experiment is not None:
        checkpoint_prefix = experiment

    log_dir = os.path.join("runs", checkpoint_prefix)
    os.makedirs(log_dir, exist_ok=False)

    config_fname = os.path.join(log_dir, f"{checkpoint_prefix}.json")
    with open(config_fname, "w") as f:
        train_session_args = vars(args)
        f.write(json.dumps(train_session_args, indent=2))

    default_callbacks = [
        CompetitionMetricCallback(input_key=INPUT_MASK_KEY,
                                  output_key=OUTPUT_MASK_KEY,
                                  prefix="weighted_f1"),
        ConfusionMatrixCallback(
            input_key=INPUT_MASK_KEY,
            output_key=OUTPUT_MASK_KEY,
            class_names=[
                "land", "no_damage", "minor_damage", "major_damage",
                "destroyed"
            ],
            ignore_index=UNLABELED_SAMPLE,
        ),
    ]

    if show:
        default_callbacks += [
            ShowPolarBatchesCallback(draw_predictions,
                                     metric=main_metric + "_batch",
                                     minimize=False)
        ]

    train_ds, valid_ds, train_sampler = get_datasets(
        data_dir=data_dir,
        image_size=image_size,
        augmentation=augmentations,
        fast=fast,
        fold=fold,
        balance=balance,
        only_buildings=only_buildings,
        train_on_crops=train_on_crops,
        crops_multiplication_factor=1,
        enable_post_image_transform=enable_post_image_transform,
    )

    if run_train:
        loaders = collections.OrderedDict()
        callbacks = default_callbacks.copy()
        criterions_dict = {}
        losses = []

        unlabeled_train = get_pseudolabeling_dataset(
            data_dir,
            include_masks=True,
            image_size=image_size,
            augmentation="medium_nmd",
            train_on_crops=train_on_crops,
            enable_post_image_transform=enable_post_image_transform,
            pseudolabels_dir=pseudolabels_dir,
        )

        train_ds = train_ds + unlabeled_train

        print("Using online pseudolabeling with ", len(unlabeled_train),
              "samples")

        loaders["train"] = DataLoader(
            train_ds,
            batch_size=train_batch_size,
            num_workers=num_workers,
            pin_memory=True,
            drop_last=True,
            shuffle=True,
        )

        loaders["valid"] = DataLoader(valid_ds,
                                      batch_size=valid_batch_size,
                                      num_workers=num_workers,
                                      pin_memory=True)

        # Create losses
        for criterion in segmentation_losses:
            if isinstance(criterion, (list, tuple)) and len(criterion) == 2:
                loss_name, loss_weight = criterion
            else:
                loss_name, loss_weight = criterion[0], 1.0

            cd, criterion, criterion_name = get_criterion_callback(
                loss_name,
                prefix="segmentation",
                input_key=INPUT_MASK_KEY,
                output_key=OUTPUT_MASK_KEY,
                loss_weight=float(loss_weight),
            )
            criterions_dict.update(cd)
            callbacks.append(criterion)
            losses.append(criterion_name)
            print(INPUT_MASK_KEY, "Using loss", loss_name, loss_weight)

        if args.mask4 is not None:
            for criterion in args.mask4:
                if isinstance(criterion, (list, tuple)):
                    loss_name, loss_weight = criterion
                else:
                    loss_name, loss_weight = criterion, 1.0

                cd, criterion, criterion_name = get_criterion_callback(
                    loss_name,
                    prefix="mask4",
                    input_key=INPUT_MASK_KEY,
                    output_key=OUTPUT_MASK_4_KEY,
                    loss_weight=float(loss_weight),
                )
                criterions_dict.update(cd)
                callbacks.append(criterion)
                losses.append(criterion_name)
                print(OUTPUT_MASK_4_KEY, "Using loss", loss_name, loss_weight)

        if args.mask8 is not None:
            for criterion in args.mask8:
                if isinstance(criterion, (list, tuple)):
                    loss_name, loss_weight = criterion
                else:
                    loss_name, loss_weight = criterion, 1.0

                cd, criterion, criterion_name = get_criterion_callback(
                    loss_name,
                    prefix="mask8",
                    input_key=INPUT_MASK_KEY,
                    output_key=OUTPUT_MASK_8_KEY,
                    loss_weight=float(loss_weight),
                )
                criterions_dict.update(cd)
                callbacks.append(criterion)
                losses.append(criterion_name)
                print(OUTPUT_MASK_8_KEY, "Using loss", loss_name, loss_weight)

        if args.mask16 is not None:
            for criterion in args.mask16:
                if isinstance(criterion, (list, tuple)):
                    loss_name, loss_weight = criterion
                else:
                    loss_name, loss_weight = criterion, 1.0

                cd, criterion, criterion_name = get_criterion_callback(
                    loss_name,
                    prefix="mask16",
                    input_key=INPUT_MASK_KEY,
                    output_key=OUTPUT_MASK_16_KEY,
                    loss_weight=float(loss_weight),
                )
                criterions_dict.update(cd)
                callbacks.append(criterion)
                losses.append(criterion_name)
                print(OUTPUT_MASK_16_KEY, "Using loss", loss_name, loss_weight)

        if args.mask32 is not None:
            for criterion in args.mask32:
                if isinstance(criterion, (list, tuple)):
                    loss_name, loss_weight = criterion
                else:
                    loss_name, loss_weight = criterion, 1.0

                cd, criterion, criterion_name = get_criterion_callback(
                    loss_name,
                    prefix="mask32",
                    input_key=INPUT_MASK_KEY,
                    output_key=OUTPUT_MASK_32_KEY,
                    loss_weight=float(loss_weight),
                )
                criterions_dict.update(cd)
                callbacks.append(criterion)
                losses.append(criterion_name)
                print(OUTPUT_MASK_32_KEY, "Using loss", loss_name, loss_weight)

        if disaster_type_loss is not None:
            callbacks += [
                ConfusionMatrixCallback(
                    input_key=DISASTER_TYPE_KEY,
                    output_key=DISASTER_TYPE_KEY,
                    class_names=DISASTER_TYPES,
                    ignore_index=UNKNOWN_DISASTER_TYPE_CLASS,
                    prefix=f"{DISASTER_TYPE_KEY}/confusion_matrix",
                ),
                AccuracyCallback(
                    input_key=DISASTER_TYPE_KEY,
                    output_key=DISASTER_TYPE_KEY,
                    prefix=f"{DISASTER_TYPE_KEY}/accuracy",
                    activation="Softmax",
                ),
            ]

            for criterion in disaster_type_loss:
                if isinstance(criterion, (list, tuple)):
                    loss_name, loss_weight = criterion
                else:
                    loss_name, loss_weight = criterion, 1.0

                cd, criterion, criterion_name = get_criterion_callback(
                    loss_name,
                    prefix=DISASTER_TYPE_KEY,
                    input_key=DISASTER_TYPE_KEY,
                    output_key=DISASTER_TYPE_KEY,
                    loss_weight=float(loss_weight),
                    ignore_index=UNKNOWN_DISASTER_TYPE_CLASS,
                )
                criterions_dict.update(cd)
                callbacks.append(criterion)
                losses.append(criterion_name)
                print(DISASTER_TYPE_KEY, "Using loss", loss_name, loss_weight)

        if damage_type_loss is not None:
            callbacks += [
                # MultilabelConfusionMatrixCallback(
                #     input_key=DAMAGE_TYPE_KEY,
                #     output_key=DAMAGE_TYPE_KEY,
                #     class_names=DAMAGE_TYPES,
                #     prefix=f"{DAMAGE_TYPE_KEY}/confusion_matrix",
                # ),
                AccuracyCallback(
                    input_key=DAMAGE_TYPE_KEY,
                    output_key=DAMAGE_TYPE_KEY,
                    prefix=f"{DAMAGE_TYPE_KEY}/accuracy",
                    activation="Sigmoid",
                    threshold=0.5,
                )
            ]

            for criterion in damage_type_loss:
                if isinstance(criterion, (list, tuple)):
                    loss_name, loss_weight = criterion
                else:
                    loss_name, loss_weight = criterion, 1.0

                cd, criterion, criterion_name = get_criterion_callback(
                    loss_name,
                    prefix=DAMAGE_TYPE_KEY,
                    input_key=DAMAGE_TYPE_KEY,
                    output_key=DAMAGE_TYPE_KEY,
                    loss_weight=float(loss_weight),
                )
                criterions_dict.update(cd)
                callbacks.append(criterion)
                losses.append(criterion_name)
                print(DAMAGE_TYPE_KEY, "Using loss", loss_name, loss_weight)

        if embedding_criterion is not None:
            cd, criterion, criterion_name = get_criterion_callback(
                embedding_criterion,
                prefix="embedding",
                input_key=INPUT_MASK_KEY,
                output_key=OUTPUT_EMBEDDING_KEY,
                loss_weight=1.0,
            )
            criterions_dict.update(cd)
            callbacks.append(criterion)
            losses.append(criterion_name)
            print(OUTPUT_EMBEDDING_KEY, "Using loss", embedding_criterion)

        callbacks += [
            CriterionAggregatorCallback(prefix="loss", loss_keys=losses),
            OptimizerCallback(accumulation_steps=accumulation_steps,
                              decouple_weight_decay=False),
        ]

        optimizer = get_optimizer(optimizer_name,
                                  get_optimizable_parameters(model),
                                  learning_rate,
                                  weight_decay=weight_decay)
        scheduler = get_scheduler(scheduler_name,
                                  optimizer,
                                  lr=learning_rate,
                                  num_epochs=num_epochs,
                                  batches_in_epoch=len(loaders["train"]))
        if isinstance(scheduler, CyclicLR):
            callbacks += [SchedulerCallback(mode="batch")]

        print("Train session    :", checkpoint_prefix)
        print("  FP16 mode      :", fp16)
        print("  Fast mode      :", args.fast)
        print("  Epochs         :", num_epochs)
        print("  Workers        :", num_workers)
        print("  Data dir       :", data_dir)
        print("  Log dir        :", log_dir)
        print("Data             ")
        print("  Augmentations  :", augmentations)
        print("  Train size     :", len(loaders["train"]), len(train_ds))
        print("  Valid size     :", len(loaders["valid"]), len(valid_ds))
        print("  Image size     :", image_size)
        print("  Train on crops :", train_on_crops)
        print("  Balance        :", balance)
        print("  Buildings only :", only_buildings)
        print("  Post transform :", enable_post_image_transform)
        print("  Pseudolabels   :", pseudolabels_dir)
        print("Model            :", model_name)
        print("  Parameters     :", count_parameters(model))
        print("  Dropout        :", dropout)
        print("Optimizer        :", optimizer_name)
        print("  Learning rate  :", learning_rate)
        print("  Weight decay   :", weight_decay)
        print("  Scheduler      :", scheduler_name)
        print("  Batch sizes    :", train_batch_size, valid_batch_size)
        print("  Criterion      :", segmentation_losses)
        print("  Damage type    :", damage_type_loss)
        print("  Disaster type  :", disaster_type_loss)
        print(" Embedding      :", embedding_criterion)

        # model training
        runner.train(
            fp16=fp16,
            model=model,
            criterion=criterions_dict,
            optimizer=optimizer,
            scheduler=scheduler,
            callbacks=callbacks,
            loaders=loaders,
            logdir=os.path.join(log_dir, "opl"),
            num_epochs=num_epochs,
            verbose=verbose,
            main_metric=main_metric,
            minimize_metric=False,
            checkpoint_data={"cmd_args": cmd_args},
        )

        # Training is finished. Let's run predictions using best checkpoint weights
        best_checkpoint = os.path.join(log_dir, "main", "checkpoints",
                                       "best.pth")

        model_checkpoint = os.path.join(log_dir, "main", "checkpoints",
                                        f"{checkpoint_prefix}.pth")
        clean_checkpoint(best_checkpoint, model_checkpoint)

        del optimizer, loaders
Exemple #13
0
def main(
    config_path,
    log_dir=None,
    experiment_name=None,
    dataset_dir=None,
    num_epochs=50,
    num_labels=20,
    batch_size=38,
    num_workers=4,
    val_batch_size_multiplier=2,
    lr=1e-3,
    scheduler=None,
    optimizer=None,
    weight_decay=0,
    class_weight=None,
    check=False,
    verbose=True,
    cudnn_benchmark=True):

    if torch.cuda.is_available() and cudnn_benchmark:
        torch.backends.cudnn.benchmark = True
    
    # experiment setup
    logdir = log_dir + experiment_name
    num_epochs = num_epochs

    # data
    train_dataset = LipreadingDataset(
        phase = "train")

    valid_dataset = LipreadingDataset(
        phase = "val")

    loaders = {
        "train": DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=num_workers,
            drop_last=True),

        "valid": DataLoader(
            valid_dataset,
            batch_size=val_batch_size_multiplier*batch_size,
            shuffle=True,
            num_workers=num_workers,
            drop_last=False)
    }

    # model, criterion, optimizer
    model = LipNext()

    criterion = torch.nn.CrossEntropyLoss(weight=class_weight)
    optimizer = optimizer(model.parameters(), lr=lr, weight_decay=weight_decay)
    if scheduler:
        scheduler = scheduler(optimizer)
    else:
        scheduler = scheduler
    
    # model runner
    device = torch.device("cpu")
    if torch.cuda.is_available():
        device = torch.device("cuda")

    runner = SupervisedRunner(device=device)

    # callbacks
    acc_callback = AccuracyCallback(accuracy_args=[1, 3])
    ckpt_callback = CheckpointCallbackV2(config_path=config_path)
    neg_mining_callback = NegativeMiningCallback()
    callbacks = [acc_callback, ckpt_callback, neg_mining_callback]


    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        callbacks=callbacks,
        loaders=loaders,
        logdir=logdir,
        main_metric="accuracy01",
        minimize_metric=False,
        num_epochs=num_epochs,
        verbose=verbose,
        check=check
    )
Exemple #14
0
logdir = "./logs/effnet-b0"
fp16_params = None  # dict(opt_level="O1")
runner = SupervisedRunner(device='cuda')


runner.train(
    model=model,
    criterion=criterion,
    scheduler=scheduler,
    optimizer=optimizer,
    loaders=loaders,
    callbacks=[
        # wAUC(),
        F1ScoreCallback(),
        AUCCallback(num_classes=4),
        AccuracyCallback(prefix='ACC'),
        OptimizerCallback(accumulation_steps=args.acc)],
    logdir=logdir,
    num_epochs=num_epochs,
    fp16=fp16_params,
    verbose=True
)
if args.test > 0:
    test_preds_proba: Union[List, Iterable, np.ndarray] = []
    model.eval()
    progress_bar_test = tqdm(test_dataset)
    with torch.no_grad():
        for i, im in enumerate(progress_bar_test):
            inputs = im.to('cuda')
            # flip horizontal
            im = kornia.augmentation.F.hflip(inputs)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[3, 8],
                                                 gamma=0.3)

# model runner
runner = SupervisedRunner()

# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[
        AccuracyCallback(accuracy_args=[1, 3, 5]),
        EarlyStoppingCallback(patience=2, min_delta=0.01),
    ],
    logdir=logdir,
    num_epochs=num_epochs,
    check=True,
)

# In[ ]:

# utils.plot_metrics(
#     logdir=logdir,
#     metrics=["loss", "accuracy01", "accuracy03", "_base/lr"])

# # Setup 5 - training with 1cycle
Exemple #16
0
scheduler = torch.optim.lr_scheduler.MultiStepLR(
    optimizer, milestones=[3, 8], gamma=0.3
)

# model runner
runner = SupervisedRunner()

# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[
        AccuracyCallback(accuracy_args=[1, 3, 5]),
        EarlyStoppingCallback(patience=2, min_delta=0.01),
    ],
    logdir=logdir,
    num_epochs=num_epochs,
    check=True,
)

# In[ ]:

# utils.plot_metrics(
#     logdir=logdir,
#     metrics=["loss", "accuracy01", "accuracy03", "_base/lr"])

# # Setup 5 - training with 1cycle
Exemple #17
0
    loaders = OrderedDict()
    loaders["train"] = train_dl
    loaders["valid"] = valid_dl

    # model
    model = AttentionModel(INPUT_DIM, HID_DIM, OUTPUT_DIM, RECURRENT_Layers,
                           DROPOUT).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20, 60])
    criterion = torch.nn.CrossEntropyLoss()

    # model training
    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        logdir=logdir,
        num_epochs=EPOCHS,
        verbose=True,
        callbacks=[
            AccuracyCallback(num_classes=5, topk_args=[1, 2]),
            EarlyStoppingCallback(metric='accuracy01',
                                  minimize=False,
                                  patience=10)
        ],
    )
Exemple #18
0
def main():
    args = get_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
    SEED = 42
    utils.set_global_seed(SEED)
    utils.prepare_cudnn(deterministic=True)
    num_classes = 14

    #define datasets
    train_dataset = ChestXrayDataSet(
        data_dir=args.path_to_images,
        image_list_file=args.train_list,
        transform=transforms_train,
    )

    val_dataset = ChestXrayDataSet(
        data_dir=args.path_to_images,
        image_list_file=args.val_list,
        transform=transforms_val,
    )

    loaders = {
        'train':
        DataLoader(train_dataset,
                   batch_size=args.batch_size,
                   shuffle=True,
                   num_workers=args.num_workers),
        'valid':
        DataLoader(val_dataset,
                   batch_size=2,
                   shuffle=False,
                   num_workers=args.num_workers)
    }

    logdir = args.log_dir  #where model weights and logs are stored

    #define model
    model = DenseNet121(num_classes)
    if len(args.gpus) > 1:
        model = nn.DataParallel(model)
    device = utils.get_device()
    runner = SupervisedRunner(device=device)

    optimizer = RAdam(model.parameters(), lr=args.lr, weight_decay=0.0003)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     factor=0.25,
                                                     patience=2)

    weights = torch.Tensor(
        [10, 100, 30, 8, 40, 40, 330, 140, 35, 155, 110, 250, 155,
         200]).to(device)
    criterion = BCEWithLogitsLoss(pos_weight=weights)

    class_names = [
        'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass',
        'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema',
        'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia'
    ]

    runner.train(
        model=model,
        logdir=logdir,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        num_epochs=args.epochs,

        # We can specify the callbacks list for the experiment;
        # For this task, we will check AUC and accuracy
        callbacks=[
            AUCCallback(
                input_key="targets",
                output_key='logits',
                prefix='auc',
                class_names=class_names,
                num_classes=num_classes,
                activation='Sigmoid',
            ),
            AccuracyCallback(
                input_key="targets",
                output_key="logits",
                prefix="accuracy",
                accuracy_args=[1],
                num_classes=14,
                threshold=0.5,
                activation='Sigmoid',
            ),
        ],
        main_metric='auc/_mean',
        minimize_metric=False,
        verbose=True,
    )
Exemple #19
0
def train_model(
    df_train,
    df_valid,
    model_class,
    model_params,
    vectorizer,
    general_params,
):
    vectorizer = copy.deepcopy(vectorizer)
    vectorizer.fit(df_train["text"])

    df_train = make_df(df_train, vectorizer)
    train_ds = GeneralDataset(
        df_train["tokens"].values,
        labels=df_train["label"].values,
        max_sentence_len=general_params["max_sentence_len"],
    )
    trainloader = DataLoader(
        dataset=train_ds,
        batch_size=general_params["batch_size"],
        shuffle=True,
        num_workers=general_params["num_workers"],
    )

    df_valid = make_df(df_valid, vectorizer)
    valid_ds = GeneralDataset(
        df_valid["tokens"].values,
        labels=df_valid["label"].values,
        max_sentence_len=general_params["max_sentence_len"],
    )
    validloader = DataLoader(
        dataset=valid_ds,
        batch_size=general_params["batch_size"],
        shuffle=False,
        num_workers=general_params["num_workers"],
    )

    loaders = collections.OrderedDict()
    loaders["train"] = trainloader
    loaders["valid"] = validloader

    model_params = copy.deepcopy(model_params)
    model_params.update({"vocab_size": len(vectorizer.vocabulary_)})
    model = model_class(**model_params).float()

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), general_params["lr"])

    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        callbacks=[
            AccuracyCallback(),
            EarlyStoppingCallback(patience=general_params["patience"],
                                  metric="accuracy01",
                                  minimize=False),
        ],
        logdir=general_params["logdir"],
        num_epochs=general_params["num_epochs"],
        main_metric="accuracy01",
        minimize_metric=False,
        load_best_on_end=True,
        verbose=False,
    )

    with open(os.path.join(general_params["logdir"], "vectorizer.pickle"),
              "wb") as output_file:
        pickle.dump(vectorizer, output_file)
Exemple #20
0
def main():
    cifar_train = CIFAR10('.',
                          train=True,
                          transform=transforms.Compose([
                              transforms.Resize((224, 224)),
                              transforms.ToTensor()
                          ]),
                          download=True)
    cifar_test = CIFAR10('.',
                         train=False,
                         transform=transforms.Compose([
                             transforms.Resize((224, 224)),
                             transforms.ToTensor()
                         ]),
                         download=True)

    dl_train = DataLoader(cifar_train, batch_size=16)
    dl_test = DataLoader(cifar_test, batch_size=16)

    logdir = "./logdir/Adam"
    num_epochs = 10

    loaders = {'train': dl_train, 'valid': dl_test}

    model = resnet34()
    for name, param in model.named_parameters():
        param.requires_grad = True

    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        num_epochs=num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=[
            logger.TensorboardLogger(),
            AccuracyCallback(num_classes=10)
        ],
    )

    logdir = "./logdir/AdamW"

    model.apply(init_weights)
    optimizer = AdamW()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        num_epochs=num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=[
            logger.TensorboardLogger(),
            AccuracyCallback(num_classes=10)
        ],
    )

    logdir = "./logdir/RAdam"

    model.apply(init_weights)
    optimizer = RAdam()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        num_epochs=num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=[
            logger.TensorboardLogger(),
            AccuracyCallback(num_classes=10)
        ],
    )
Exemple #21
0
valid_dl = BaseDataLoader(valid_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=False,
                          num_workers=4)

# experiment setup
num_epochs = EPOCHS
logdir = LOGDIR
loaders = {"train": train_dl, "valid": valid_dl}
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[15, 19, 22])

callbacks = [
    AccuracyCallback(num_classes=2, activation='Sigmoid', threshold=0.5),
]
runner = SupervisedRunner()

# Train
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    callbacks=callbacks,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=1,
    scheduler=scheduler,
    # main_metric='f1_score',
scheduler = torch.optim.lr_scheduler.MultiStepLR(
    optimizer, milestones=[3, 8], gamma=0.3
)

# model runner
runner = SupervisedRunner()

# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[
        AccuracyCallback(topk_args=[1, 3, 5]),
        EarlyStoppingCallback(patience=2, min_delta=0.01),
    ],
    logdir=logdir,
    num_epochs=num_epochs,
    check=True,
)

# In[ ]:

# utils.plot_metrics(
#     logdir=logdir,
#     metrics=["loss", "accuracy01", "accuracy03", "_base/lr"])

# # Setup 5 - training with 1cycle
    "train": train_dl,
    "valid": valid_dl
}
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD([
    {'params': model.layer1.parameters(), 'lr': LR / 10},
    {'params': model.layer2.parameters(), 'lr': LR / 5},
    {'params': model.layer3.parameters(), 'lr': LR / 2},
    {'params': model.layer4.parameters(), 'lr': LR / 1},
], lr=LR)
# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=[LR / 10, LR / 5, LR / 2, LR / 1], total_steps=100)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-7)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, cooldown=2, min_lr=1e-7)

callbacks = [
    AccuracyCallback(num_classes=5, threshold=0.5, activation='Softmax'),
    F1ScoreCallback(input_key="targets_one_hot", activation='Softmax', threshold=0.5),
]
runner = SupervisedRunner()

## Step 1.

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    callbacks=callbacks,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=1,
def main():
    train_dataset = dataset.SentimentDataset(
        texts=df_train['sentences'].values.tolist(),
        labels=df_train['labels'].values,
        max_seq_length=config.MAX_SEQ_LENGTH,
        model_name=config.MODEL_NAME)

    valid_dataset = dataset.SentimentDataset(
        texts=df_valid['sentences'].values.tolist(),
        labels=df_valid['labels'].values,
        max_seq_length=config.MAX_SEQ_LENGTH,
        model_name=config.MODEL_NAME)

    train_val_loaders = {
        "train":
        DataLoader(dataset=train_dataset,
                   batch_size=config.BATCH_SIZE,
                   shuffle=True,
                   num_workers=2,
                   pin_memory=True),
        "valid":
        DataLoader(dataset=valid_dataset,
                   batch_size=config.BATCH_SIZE,
                   shuffle=False,
                   num_workers=2,
                   pin_memory=True)
    }

    dBert = model.DistilBert()

    param_optim = list(dBert.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']

    criterion = nn.CrossEntropyLoss()

    base_optimizer = RAdam([{
        'params':
        [p for n, p in param_optim if not any(nd in n for nd in no_decay)],
        'weight_decay':
        config.WEIGHT_DECAY
    }, {
        'params':
        [p for n, p in param_optim if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }])
    optimizer = Lookahead(base_optimizer)
    scheduler = OneCycleLRWithWarmup(
        optimizer,
        num_steps=config.NUM_EPOCHS,
        lr_range=(config.LEARNING_RATE, 1e-8),
        init_lr=config.LEARNING_RATE,
        warmup_steps=0,
    )
    runner = SupervisedRunner(input_key=("input_ids", "attention_mask"))
    # model training
    runner.train(model=dBert,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=train_val_loaders,
                 callbacks=[
                     AccuracyCallback(num_classes=2),
                     OptimizerCallback(accumulation_steps=config.ACCUM_STEPS),
                 ],
                 fp16=config.FP_16,
                 logdir=config.LOG_DIR,
                 num_epochs=config.NUM_EPOCHS,
                 verbose=True)
def get_callbacks(num_classes):
    callbacks = [
        AccuracyCallback(num_classes=num_classes),
        F1ScoreCallback(input_key="targets_one_hot", activation="Softmax")
    ]
    return callbacks
def objective(trial):
    logdir = "/clusterdata/uqyzha77/Log/vic/"
    num_epochs = 100
    INPUT_DIM = 1
    OUTPUT_DIM = 5
    BATCH_SIZE = 64  # change here for multi gpu training 16*4=64
    num_classes = 5
    num_gpu = 1

    lr = trial.suggest_loguniform("lr", 1e-3, 1e-1)

    # generate dataloader
    data_path = '/afm02/Q2/Q2067/MoDS/Dabang_Sheng/Data/VIC_ready2use150000_yz_filtered80210.csv'
    df_all = pd.read_csv(data_path)

    labels = df_all.iloc[:, 4].copy()
    columns_name = list(range(0, 276))
    df2 = pd.DataFrame(df_all['VI_values'].str.slice(
        1, -1).str.split().values.tolist(),
                       columns=columns_name,
                       dtype=float)
    X = df2
    y = labels

    le = LabelEncoder()
    le.fit(y)
    print(le.classes_)
    class_names = le.classes_
    y = le.transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=SEED,
                                                        stratify=y)

    X_train_resampled, y_train_resampled = X_train, y_train

    unique_elements, counts_elements = np.unique(y_train, return_counts=True)
    weights = [1 / i for i in counts_elements]
    weights[2] = weights[2] / 15
    print(np.asarray((unique_elements, counts_elements)))
    print(weights)
    samples_weight = np.array([weights[t] for t in y_train])
    samples_weights = torch.FloatTensor(samples_weight).to(device)
    class_weights = torch.FloatTensor(weights).to(device)
    sampler = torch.utils.data.sampler.WeightedRandomSampler(
        samples_weights, len(X_train_resampled), replacement=True)

    # prepare PyTorch Datasets
    X_train_tensor = numpy_to_tensor(X_train_resampled.to_numpy(),
                                     torch.FloatTensor)
    y_train_tensor = numpy_to_tensor(y_train_resampled, torch.long)
    X_test_tensor = numpy_to_tensor(X_test.to_numpy(), torch.FloatTensor)
    y_test_tensor = numpy_to_tensor(y_test, torch.long)

    X_train_tensor = torch.unsqueeze(X_train_tensor, 2)
    X_test_tensor = torch.unsqueeze(X_test_tensor, 2)

    train_ds = TensorDataset(X_train_tensor, y_train_tensor)
    valid_ds = TensorDataset(X_test_tensor, y_test_tensor)

    train_dl = DataLoader(train_ds,
                          batch_size=BATCH_SIZE,
                          sampler=sampler,
                          drop_last=True,
                          num_workers=0)
    valid_dl = DataLoader(valid_ds,
                          batch_size=BATCH_SIZE,
                          shuffle=False,
                          drop_last=True,
                          num_workers=0)

    # Catalyst loader:
    loaders = OrderedDict()
    loaders["train"] = train_dl
    loaders["valid"] = valid_dl

    # model
    model = AttentionModel(trial, BATCH_SIZE // num_gpu, INPUT_DIM,
                           OUTPUT_DIM).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20, 40, 60])
    criterion = torch.nn.CrossEntropyLoss()

    # model training
    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True,
        callbacks=[
            AccuracyCallback(num_classes=num_classes),
            CatalystPruningCallback(
                trial,
                metric="accuracy01"),  # top-1 accuracy as metric for pruning
        ],
    )

    return runner.state.valid_metrics["accuracy01"]
        fp16_params = dict(opt_level="O1")  # params for FP16
    else:
        fp16_params = None

    runner = SupervisedRunner(device=device)

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        # We can specify the callbacks list for the experiment;
        # For this task, we will check accuracy, AUC and F1 metrics
        callbacks=[
            AccuracyCallback(num_classes=config.num_classes),
            AUCCallback(
                num_classes=config.num_classes,
                input_key="targets_one_hot",
                class_names=config.class_names
            ),
            F1ScoreCallback(
                input_key="targets_one_hot",
                activation="Softmax"
            ),
            CheckpointCallback(
                save_n_best=1,
                #             resume_dir="./models/classification",
                metrics_filename="metrics.json"
            ),
            EarlyStoppingCallback(
Exemple #28
0
set_global_seed(params["general"]["seed"])
prepare_cudnn(deterministic=True)

# here we specify that we pass masks to the runner. So model's forward method will be called with
# these arguments passed to it.
runner = SupervisedRunner(input_key=("features", "attention_mask"))

# finally, training the model with Catalyst
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=train_val_loaders,
    callbacks=[
        AccuracyCallback(num_classes=int(params["model"]["num_classes"])),
        OptimizerCallback(
            accumulation_steps=int(params["training"]["accum_steps"])),
    ],
    logdir=params["training"]["log_dir"],
    num_epochs=int(params["training"]["num_epochs"]),
    verbose=True,
)

# and running inference
torch.cuda.empty_cache()
runner.infer(
    model=model,
    loaders=test_loaders,
    callbacks=[
        CheckpointCallback(
Exemple #29
0
    def fit(self,
            train_df, dev_df,
            batch_size=16, max_seq_length=256, learning_rate=5e-5,
            epochs=1, log_dir=None, verbose=False):

            start = time.time()
            config = {
                "model_name": self.model_name,
                "batch_size": batch_size,
                "max_seq_length": max_seq_length,
                "learning_rate": learning_rate,
                "epochs": epochs,
                "log_dir": log_dir
            }

            train_y = train_df[0]
            train_X = train_df[1]
            label2id = dict(
                zip(sorted(set(train_y)), range(len(set(train_y))))
            )
            self.id2label = {v: k for k, v in label2id.items()}
            num_labels = len(label2id)

            self.train_data = ClassificationDataset(
                tokenizer=self.tokenizer,
                label2id=label2id,
                max_seq_length=max_seq_length,
                texts=train_X,
                labels=train_y
            )

            dev_y = dev_df[0]
            dev_X = dev_df[1]

            self.dev_data = ClassificationDataset(
                tokenizer=self.tokenizer,
                label2id=label2id,
                max_seq_length=max_seq_length,
                texts=dev_X,
                labels=dev_y
            )

            train_dev_loaders = {
                "train": DataLoader(
                    dataset=self.train_data,
                    batch_size=batch_size,
                    shuffle=True
                ),
                "valid": DataLoader(
                    dataset=self.dev_data,
                    batch_size=batch_size,
                    shuffle=False
                )
            }

            model = BERTBaseJapaneseModel(self.model_name, num_labels)
            criterion = torch.nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

            self.runner.train(
                model=model,
                criterion=criterion,
                optimizer=optimizer,
                scheduler=scheduler,
                loaders=train_dev_loaders,
                callbacks=[
                    AccuracyCallback(num_classes=num_labels),
                ],
                fp16=None,
                logdir=log_dir,
                num_epochs=epochs,
                verbose=verbose
            )

            self.elapsed_time = time.time() - start
            config["elapsed_time"] = self.elapsed_time

            if os.path.exists(f"{log_dir}/checkpoints"):
                filename = f"{log_dir}/checkpoints/config.pkl"
                with open(filename, "wb") as f:
                    pickle.dump([label2id, config], f)
        momentum_range=(0.85, 0.95),
    )
else:
    step = len(range(0, args.num_epochs, 4))
    milestones = [step * i for i in range(1, 4)]
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=milestones,
                                                     gamma=0.1)

runner = SupervisedRunner(input_key='features',
                          output_key=['embeddings', 'logits'])

callbacks = [
    AccuracyCallback(
        num_classes=args.num_classes,
        accuracy_args=[1],
        activation="Softmax",
    ),
    CriterionCallback(input_key="targets", prefix="loss", criterion_key="ce"),
]

if args.triplet_loss:
    callbacks.extend([
        CriterionCallback(input_key="targets",
                          output_key="embeddings",
                          prefix="loss",
                          criterion_key="htl"),
        CriterionAggregatorCallback(prefix="loss",
                                    loss_keys=["ce", "htl"],
                                    loss_aggregate_fn="sum")
    ])