예제 #1
0
 def get_callbacks(self, stage: str):
     return {
         "criterion":
         dl.CriterionCallback(input_key="logits",
                              target_key="labels",
                              metric_key="loss"),
         "optimizer":
         dl.OptimizerCallback(metric_key="loss"),
         "scheduler":
         dl.SchedulerCallback(loader_key="valid",
                              metric_key="loss",
                              mode="batch"),
         "accuracy":
         dl.AccuracyCallback(input_key="logits",
                             target_key="labels",
                             topk_args=(1, )),
         "checkpoint":
         dl.CheckpointCallback(
             self._logdir,
             loader_key="valid",
             metric_key="accuracy",
             minimize=False,
             save_n_best=1,
         ),
         # "tqdm": dl.TqdmCallback(),
     }
예제 #2
0
 def get_callbacks(self, stage: str):
     return {
         "optimizer": dl.OptimizerCallback(metric_key="loss"),
         "checkpoint": dl.CheckpointCallback(
             self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3
         ),
     }
예제 #3
0
 def load_weights(self, callbacks_list):
     """
     Loads model weights and appends the CheckpointCallback if doing
     stateful model loading. This doesn't add the CheckpointCallback if
     it's 'model_only' loading bc SupervisedRunner adds it by default.
     """
     ckpoint_params = self.cb_params["checkpoint_params"]
     # Having checkpoint_params=None is a hacky way to say no checkpoint
     # callback but eh what the heck
     if ckpoint_params["checkpoint_path"] != None:
         mode = ckpoint_params["mode"].lower()
         if mode == "full":
             print("Stateful loading...")
             ckpoint_p = Path(ckpoint_params["checkpoint_path"])
             fname = ckpoint_p.name
             # everything in the path besides the base file name
             resume_dir = str(ckpoint_p.parents[0])
             print(f"Loading {fname} from {resume_dir}. \
                   \nCheckpoints will also be saved in {resume_dir}.")
             # adding the checkpoint callback
             ckpoint = [
                 callbacks.CheckpointCallback(resume=fname,
                                              resume_dir=resume_dir)
             ]
             callbacks_list = callbacks_list + ckpoint
         elif mode == "model_only":
             print("Loading weights into model...")
             self.model = load_weights_train(
                 ckpoint_params["checkpoint_path"], self.model)
     return callbacks_list
예제 #4
0
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
        student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
        model = {"teacher": teacher, "student": student}
        criterion = {"cls": nn.CrossEntropyLoss(), "kl": nn.KLDivLoss(reduction="batchmean")}
        optimizer = optim.Adam(student.parameters(), lr=0.02)

        loaders = {
            "train": DataLoader(
                MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32
            ),
            "valid": DataLoader(
                MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32
            ),
        }

        runner = DistilRunner()
        # model training
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            num_epochs=1,
            logdir=logdir,
            verbose=False,
            callbacks=[
                dl.AccuracyCallback(
                    input_key="t_logits", target_key="targets", num_classes=2, prefix="teacher_"
                ),
                dl.AccuracyCallback(
                    input_key="s_logits", target_key="targets", num_classes=2, prefix="student_"
                ),
                dl.CriterionCallback(
                    input_key="s_logits",
                    target_key="targets",
                    metric_key="cls_loss",
                    criterion_key="cls",
                ),
                dl.CriterionCallback(
                    input_key="s_logprobs",
                    target_key="t_probs",
                    metric_key="kl_div_loss",
                    criterion_key="kl",
                ),
                dl.MetricAggregationCallback(
                    metric_key="loss", metrics=["kl_div_loss", "cls_loss"], mode="mean"
                ),
                dl.OptimizerCallback(metric_key="loss", model_key="student"),
                dl.CheckpointCallback(
                    logdir=logdir,
                    loader_key="valid",
                    metric_key="loss",
                    minimize=True,
                    save_n_best=3,
                ),
            ],
        )
예제 #5
0
 def get_callbacks(self, stage: str):
     return {
         "criterion":
         dl.CriterionCallback(metric_key="loss",
                              input_key="logits",
                              target_key="targets"),
         "optimizer":
         dl.OptimizerCallback(metric_key="loss"),
         # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "accuracy":
         dl.AccuracyCallback(input_key="logits",
                             target_key="targets",
                             topk_args=(1, 3, 5)),
         "classification":
         dl.PrecisionRecallF1SupportCallback(input_key="logits",
                                             target_key="targets",
                                             num_classes=10),
         "confusion_matrix":
         dl.ConfusionMatrixCallback(input_key="logits",
                                    target_key="targets",
                                    num_classes=10),
         "checkpoint":
         dl.CheckpointCallback(self._logdir,
                               loader_key="valid",
                               metric_key="loss",
                               minimize=True,
                               save_n_best=3),
     }
예제 #6
0
 def get_callbacks(self):
     return {
         "criterion":
         dl.CriterionCallback(metric_key="loss",
                              input_key="logits",
                              target_key="targets"),
         "backward":
         dl.BackwardCallback(metric_key="loss"),
         "optimizer":
         dl.OptimizerCallback(metric_key="loss"),
         "scheduler":
         dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "accuracy":
         dl.AccuracyCallback(input_key="logits",
                             target_key="targets",
                             topk=(1, 3, 5)),
         "checkpoint":
         dl.CheckpointCallback(
             self._logdir,
             loader_key="valid",
             metric_key="accuracy01",
             minimize=False,
             topk=1,
         ),
         "tqdm":
         dl.TqdmCallback(),
     }
예제 #7
0
 def get_callbacks(self, stage: str):
     callbacks = {
         "criterion":
         dl.CriterionCallback(metric_key="loss",
                              input_key="logits",
                              target_key="targets"),
         "optimizer":
         dl.OptimizerCallback(
             metric_key="loss",
             grad_clip_fn=nn.utils.clip_grad_norm_,
             grad_clip_params={"max_norm": 1.0},
         ),
         # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "accuracy":
         dl.AccuracyCallback(input_key="logits",
                             target_key="targets",
                             topk_args=(1, 3, 5)),
         "classification":
         dl.PrecisionRecallF1SupportCallback(input_key="logits",
                                             target_key="targets",
                                             num_classes=10),
         "checkpoint":
         dl.CheckpointCallback(self._logdir,
                               loader_key="valid",
                               metric_key="loss",
                               minimize=True,
                               save_n_best=3),
     }
     if SETTINGS.ml_required:
         callbacks["confusion_matrix"] = dl.ConfusionMatrixCallback(
             input_key="logits", target_key="targets", num_classes=10)
     return callbacks
예제 #8
0
def test_resume_with_missing_file():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/checkpoint_callback"
    checkpoint = logdir + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"
    num_epochs = 5

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {"train": loader, "valid": loader}

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    with pytest.raises(FileNotFoundError):
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            logdir=logdir,
            num_epochs=num_epochs,
            verbose=False,
            valid_loader="valid",
            valid_metric="loss",
            minimize_valid_metric=True,
            callbacks=[
                dl.CheckpointCallback(
                    logdir=logdir,
                    loader_key="valid",
                    metric_key="loss",
                    minimize=True,
                    save_n_best=2,
                    load_on_stage_end={
                        "model": "best",
                        "criterion": "best",
                        "optimizer": "last"
                    },
                    resume="not_existing_file.pth",
                ),
                dl.CheckRunCallback(num_epoch_steps=num_epochs),
            ],
        )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    shutil.rmtree(logdir, ignore_errors=True)
def test_load_best_on_stage_end():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/checkpoint_callback"
    checkpoint = logdir + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {"train": loader, "valid": loader}

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    n_epochs = 5
    # first stage
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        num_epochs=n_epochs,
        verbose=False,
        callbacks=[
            dl.CheckpointCallback(save_n_best=2, load_on_stage_end="best"),
            dl.CheckRunCallback(num_epoch_steps=n_epochs),
        ],
    )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    assert len(re.findall(r"=> Loading", exp_output)) == 1
    assert len(re.findall(r"=> Loading .*best\.pth", exp_output)) == 1

    assert os.path.isfile(logfile)
    assert os.path.isfile(checkpoint + "/train.4.pth")
    assert os.path.isfile(checkpoint + "/train.4_full.pth")
    assert os.path.isfile(checkpoint + "/train.5.pth")
    assert os.path.isfile(checkpoint + "/train.5_full.pth")
    assert os.path.isfile(checkpoint + "/best.pth")
    assert os.path.isfile(checkpoint + "/best_full.pth")
    assert os.path.isfile(checkpoint + "/last.pth")
    assert os.path.isfile(checkpoint + "/last_full.pth")

    shutil.rmtree(logdir, ignore_errors=True)
예제 #10
0
 def get_callbacks(self, stage: str):
     callbacks = {
         "scores":
         dl.BatchTransformCallback(
             input_key="logits",
             output_key="scores",
             transform=partial(torch.softmax, dim=1),
             scope="on_batch_end",
         ),
         "labels":
         dl.BatchTransformCallback(
             input_key="scores",
             output_key="labels",
             transform=partial(torch.argmax, dim=1),
             scope="on_batch_end",
         ),
         "criterion":
         dl.CriterionCallback(metric_key="loss",
                              input_key="logits",
                              target_key="targets"),
         "optimizer":
         dl.OptimizerCallback(
             metric_key="loss",
             grad_clip_fn=nn.utils.clip_grad_norm_,
             grad_clip_params={"max_norm": 1.0},
         ),
         # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "accuracy":
         dl.AccuracyCallback(input_key="logits",
                             target_key="targets",
                             topk_args=(1, 3, 5)),
         "classification":
         dl.PrecisionRecallF1SupportCallback(input_key="logits",
                                             target_key="targets",
                                             num_classes=10),
         "checkpoint":
         dl.CheckpointCallback(self._logdir,
                               loader_key="valid",
                               metric_key="loss",
                               minimize=True,
                               save_n_best=3),
     }
     if SETTINGS.ml_required:
         callbacks["confusion_matrix"] = dl.ConfusionMatrixCallback(
             input_key="logits", target_key="targets", num_classes=10)
         callbacks["f1_score"] = dl.SklearnBatchCallback(
             keys={
                 "y_pred": "labels",
                 "y_true": "targets"
             },
             metric_fn="f1_score",
             metric_key="sk_f1",
             average="macro",
             zero_division=1,
         )
     return callbacks
예제 #11
0
def train_experiment(device):
    with TemporaryDirectory() as logdir:
        # sample data
        num_users, num_features, num_items = int(1e4), int(1e1), 10
        X = torch.rand(num_users, num_features)
        y = (torch.rand(num_users, num_items) > 0.5).to(torch.float32)

        # pytorch loaders
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        # model, criterion, optimizer, scheduler
        model = torch.nn.Linear(num_features, num_items)
        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters())
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        class CustomRunner(dl.Runner):
            def handle_batch(self, batch):
                x, y = batch
                logits = self.model(x)
                self.batch = {
                    "features": x,
                    "logits": logits,
                    "scores": torch.sigmoid(logits),
                    "targets": y,
                }

        # model training
        runner = CustomRunner()
        runner.train(
            engine=dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            num_epochs=1,
            verbose=False,
            callbacks=[
                dl.CriterionCallback(input_key="logits", target_key="targets", metric_key="loss"),
                dl.AUCCallback(input_key="scores", target_key="targets"),
                dl.HitrateCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)),
                dl.MRRCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)),
                dl.MAPCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)),
                dl.NDCGCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)),
                dl.OptimizerCallback(metric_key="loss"),
                dl.SchedulerCallback(),
                dl.CheckpointCallback(
                    logdir=logdir, loader_key="valid", metric_key="map01", minimize=False
                ),
            ],
        )
예제 #12
0
 def get_callbacks(self, stage: str) -> Dict[str, dl.Callback]:
     return {
         "criterion": dl.CriterionCallback(
             metric_key="loss", input_key="logits", target_key="targets"
         ),
         "optimizer": dl.OptimizerCallback(metric_key="loss"),
         # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "checkpoint": dl.CheckpointCallback(
             self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3
         ),
         "check_freezed": CheckRequiresGrad("layer1", "train_freezed", False),
         "check_unfreezed": CheckRequiresGrad("layer1", "train_unfreezed", True),
     }
예제 #13
0
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        # sample data
        num_users, num_features, num_items = int(1e4), int(1e1), 10
        X = torch.rand(num_users, num_features)
        y = (torch.rand(num_users, num_items) > 0.5).to(torch.float32)

        # pytorch loaders
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        # model, criterion, optimizer, scheduler
        model = torch.nn.Linear(num_features, num_items)
        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters())
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        callbacks = [
            dl.CriterionCallback(input_key="logits", target_key="targets", metric_key="loss"),
            dl.AUCCallback(input_key="scores", target_key="targets"),
            dl.HitrateCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)),
            dl.MRRCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)),
            dl.MAPCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)),
            dl.NDCGCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)),
            dl.OptimizerCallback(metric_key="loss"),
            dl.SchedulerCallback(),
            dl.CheckpointCallback(
                logdir=logdir, loader_key="valid", metric_key="map01", minimize=False
            ),
        ]
        if engine is None or not isinstance(
            engine, (dl.AMPEngine, dl.DataParallelAMPEngine, dl.DistributedDataParallelAMPEngine)
        ):
            callbacks.append(dl.AUCCallback(input_key="logits", target_key="targets"))

        # model training
        runner = CustomRunner()
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            num_epochs=1,
            verbose=False,
            callbacks=callbacks,
        )
예제 #14
0
 def get_callbacks(self):
     return {
         "backward":
         dl.BackwardCallback(metric_key="loss"),
         "optimizer":
         dl.OptimizerCallback(metric_key="loss"),
         "checkpoint":
         dl.CheckpointCallback(
             self._logdir,
             loader_key="valid",
             metric_key="loss",
             minimize=True,
             topk=3,
         ),
     }
예제 #15
0
 def get_callbacks(self, stage: str):
     return {
         "criterion": dl.CriterionCallback(
             metric_key="loss", input_key="logits", target_key="targets"
         ),
         "optimizer": dl.OptimizerCallback(metric_key="loss"),
         "checkpoint": dl.CheckpointCallback(
             self._logdir,
             loader_key="valid",
             metric_key="loss",
             minimize=True,
             save_n_best=3,
             load_on_stage_start="best",
         ),
         "test_model_load": CheckModelStateLoadAfterStages("second", self._logdir, "best.pth"),
     }
예제 #16
0
def test_files_existence(tmpdir):
    logfile = tmpdir + "/model.storage.json"
    n_epochs = 5
    callbacks = [
        dl.CheckpointCallback(
            logdir=tmpdir,
            loader_key="valid",
            metric_key="loss",
            minimize=True,
            topk=2,
        ),
        dl.CheckRunCallback(num_epoch_steps=n_epochs),
    ]
    train_runner(tmpdir, n_epochs, callbacks)

    assert os.path.isfile(logfile)
    assert os.path.isfile(tmpdir + "/model.0004.pth")
    # assert os.path.isfile(tmpdir + "/train.4_full.pth")
    assert os.path.isfile(tmpdir + "/model.0005.pth")
    # assert os.path.isfile(tmpdir + "/train.5_full.pth")
    assert os.path.isfile(tmpdir + "/model.best.pth")
    # assert os.path.isfile(tmpdir + "/best_full.pth")
    assert os.path.isfile(tmpdir + "/model.last.pth")
예제 #17
0
def train_experiment(engine=None):
    with TemporaryDirectory() as logdir:
        # sample data
        num_users, num_features, num_items = int(1e4), int(1e1), 10
        X = torch.rand(num_users, num_features)
        y = (torch.rand(num_users, num_items) > 0.5).to(torch.float32)

        # pytorch loaders
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        # model, criterion, optimizer, scheduler
        model = torch.nn.Linear(num_features, num_items)
        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters())
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        callbacks = [
            dl.BatchTransformCallback(
                input_key="logits",
                output_key="scores",
                transform=torch.sigmoid,
                scope="on_batch_end",
            ),
            dl.CriterionCallback(input_key="logits",
                                 target_key="targets",
                                 metric_key="loss"),
            dl.HitrateCallback(input_key="scores",
                               target_key="targets",
                               topk=(1, 3, 5)),
            dl.MRRCallback(input_key="scores",
                           target_key="targets",
                           topk=(1, 3, 5)),
            dl.MAPCallback(input_key="scores",
                           target_key="targets",
                           topk=(1, 3, 5)),
            dl.NDCGCallback(input_key="scores",
                            target_key="targets",
                            topk=(1, 3)),
            dl.BackwardCallback(metric_key="loss"),
            dl.OptimizerCallback(metric_key="loss"),
            dl.SchedulerCallback(),
            dl.CheckpointCallback(logdir=logdir,
                                  loader_key="valid",
                                  metric_key="map01",
                                  minimize=False),
        ]
        if isinstance(engine, dl.CPUEngine):
            callbacks.append(
                dl.AUCCallback(input_key="logits", target_key="targets"))

        # model training
        runner = dl.SupervisedRunner(
            input_key="features",
            output_key="logits",
            target_key="targets",
            loss_key="loss",
        )
        runner.train(
            engine=engine,
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            num_epochs=1,
            verbose=False,
            callbacks=callbacks,
        )
예제 #18
0
def train(dev_dir, logdir, device):
    train = pd.read_csv(f'{dev_dir}/train.csv', index_col=0)
    train['all_utils'] = train['cmd_cleaned'].apply(select_utils)
    train = train.loc[train.all_utils.apply(str.strip).apply(len) > 0]
    train['util'] = train['all_utils'].apply(lambda x: x.split()[0])
    train = train.dropna().reset_index(drop=True)

    spm.SentencePieceTrainer.train(input=f'{dev_dir}/text',
                                   model_prefix=f'{dev_dir}/txt_bpe_clf',
                                   model_type='bpe',
                                   vocab_size=config.src_vocab_size)
    text_tokenizer = spm.SentencePieceProcessor(f'{dev_dir}/txt_bpe_clf.model')

    cmd_le = LabelEncoder()

    train['text_enc'] = train.text_cleaned.progress_apply(
        text_tokenizer.encode)
    train['y'] = cmd_le.fit_transform(train['util'].values)

    tdf = train[train.origin == 'original']
    tdf2 = train[train.origin != 'original']
    train, valid = train_test_split(tdf, test_size=500, random_state=SEED)
    train = pd.concat([train, tdf2]).reset_index(drop=True)

    train_ds = UtilDataset(train.text_enc, train.y, config, bos_id, eos_id,
                           pad_id)
    valid_ds = UtilDataset(valid.text_enc, valid.y, config, bos_id, eos_id,
                           pad_id)

    model = BertClassifier(config, pad_id, len(cmd_le.classes_))
    print('# params',
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    loaders = {
        'train':
        data.DataLoader(train_ds, batch_size=config.batch_size, shuffle=True),
        'valid':
        data.DataLoader(valid_ds, batch_size=config.batch_size),
    }

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.optimizer_lr,
                                 weight_decay=config.weight_decay,
                                 amsgrad=True)
    callbacks = [
        dl.CheckpointCallback(config.num_epochs),
        dl.AccuracyCallback(num_classes=len(cmd_le.classes_), topk_args=[1, 5])
    ]

    if config.schedule == 'OneCycleLR':
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=config.optimizer_lr,
            epochs=config.num_epochs,
            steps_per_epoch=len(loaders['train']))
        callbacks.append(dl.SchedulerCallback(mode="batch"))

    elif config.schedule == 'ReduceLROnPlateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            factor=config.plateau_factor,
            patience=5,
            cooldown=3,
            threshold=1e-3,
            min_lr=1e-6)
        callbacks.append(dl.SchedulerCallback(mode="epoch"))

    shutil.rmtree(logdir, ignore_errors=True)
    os.makedirs(logdir, exist_ok=True)

    runner = dl.SupervisedRunner(device=device)
    runner.train(
        model=model,
        loaders=loaders,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler if config.schedule else None,
        num_epochs=config.num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=callbacks,
    )
    joblib.dump(cmd_le, f'{dev_dir}/cmd_le')
예제 #19
0
def train_experiment(device):
    with TemporaryDirectory() as logdir:
        # sample data
        num_samples, num_features, num_classes1, num_classes2 = int(1e4), int(
            1e1), 4, 10
        X = torch.rand(num_samples, num_features)
        y1 = (torch.rand(num_samples, ) * num_classes1).to(torch.int64)
        y2 = (torch.rand(num_samples, ) * num_classes2).to(torch.int64)

        # pytorch loaders
        dataset = TensorDataset(X, y1, y2)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        class CustomModule(nn.Module):
            def __init__(self, in_features: int, out_features1: int,
                         out_features2: int):
                super().__init__()
                self.shared = nn.Linear(in_features, 128)
                self.head1 = nn.Linear(128, out_features1)
                self.head2 = nn.Linear(128, out_features2)

            def forward(self, x):
                x = self.shared(x)
                y1 = self.head1(x)
                y2 = self.head2(x)
                return y1, y2

        # model, criterion, optimizer, scheduler
        model = CustomModule(num_features, num_classes1, num_classes2)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters())
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [2])

        class CustomRunner(dl.Runner):
            def handle_batch(self, batch):
                x, y1, y2 = batch
                y1_hat, y2_hat = self.model(x)
                self.batch = {
                    "features": x,
                    "logits1": y1_hat,
                    "logits2": y2_hat,
                    "targets1": y1,
                    "targets2": y2,
                }

        # model training
        runner = CustomRunner()
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            num_epochs=1,
            verbose=False,
            callbacks=[
                dl.CriterionCallback(metric_key="loss1",
                                     input_key="logits1",
                                     target_key="targets1"),
                dl.CriterionCallback(metric_key="loss2",
                                     input_key="logits2",
                                     target_key="targets2"),
                dl.MetricAggregationCallback(prefix="loss",
                                             metrics=["loss1", "loss2"],
                                             mode="mean"),
                dl.OptimizerCallback(metric_key="loss"),
                dl.SchedulerCallback(),
                dl.AccuracyCallback(
                    input_key="logits1",
                    target_key="targets1",
                    num_classes=num_classes1,
                    prefix="one_",
                ),
                dl.AccuracyCallback(
                    input_key="logits2",
                    target_key="targets2",
                    num_classes=num_classes2,
                    prefix="two_",
                ),
                dl.ConfusionMatrixCallback(
                    input_key="logits1",
                    target_key="targets1",
                    num_classes=num_classes1,
                    prefix="one_cm",
                ),
                # catalyst[ml] required
                dl.ConfusionMatrixCallback(
                    input_key="logits2",
                    target_key="targets2",
                    num_classes=num_classes2,
                    prefix="two_cm",
                ),
                # catalyst[ml] required
                dl.CheckpointCallback(
                    "./logs/one",
                    loader_key="valid",
                    metric_key="one_accuracy",
                    minimize=False,
                    save_n_best=1,
                ),
                dl.CheckpointCallback(
                    "./logs/two",
                    loader_key="valid",
                    metric_key="two_accuracy03",
                    minimize=False,
                    save_n_best=3,
                ),
            ],
            loggers={
                "console": dl.ConsoleLogger(),
                "tb": dl.TensorboardLogger("./logs/tb")
            },
        )
def train_experiment(engine=None):
    with TemporaryDirectory() as logdir:
        # sample data
        num_samples, num_features, num_classes1, num_classes2 = int(1e4), int(
            1e1), 4, 10
        X = torch.rand(num_samples, num_features)
        y1 = (torch.rand(num_samples) * num_classes1).to(torch.int64)
        y2 = (torch.rand(num_samples) * num_classes2).to(torch.int64)

        # pytorch loaders
        dataset = TensorDataset(X, y1, y2)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        # model, criterion, optimizer, scheduler
        model = CustomModule(num_features, num_classes1, num_classes2)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters())
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [2])

        callbacks = [
            dl.CriterionCallback(metric_key="loss1",
                                 input_key="logits1",
                                 target_key="targets1"),
            dl.CriterionCallback(metric_key="loss2",
                                 input_key="logits2",
                                 target_key="targets2"),
            dl.MetricAggregationCallback(metric_key="loss",
                                         metrics=["loss1", "loss2"],
                                         mode="mean"),
            dl.BackwardCallback(metric_key="loss"),
            dl.OptimizerCallback(metric_key="loss"),
            dl.SchedulerCallback(),
            dl.AccuracyCallback(
                input_key="logits1",
                target_key="targets1",
                num_classes=num_classes1,
                prefix="one_",
            ),
            dl.AccuracyCallback(
                input_key="logits2",
                target_key="targets2",
                num_classes=num_classes2,
                prefix="two_",
            ),
            dl.CheckpointCallback(
                "./logs/one",
                loader_key="valid",
                metric_key="one_accuracy01",
                minimize=False,
                topk=1,
            ),
            dl.CheckpointCallback(
                "./logs/two",
                loader_key="valid",
                metric_key="two_accuracy03",
                minimize=False,
                topk=3,
            ),
        ]
        if SETTINGS.ml_required:
            # catalyst[ml] required
            callbacks.append(
                dl.ConfusionMatrixCallback(
                    input_key="logits1",
                    target_key="targets1",
                    num_classes=num_classes1,
                    prefix="one_cm",
                ))
            # catalyst[ml] required
            callbacks.append(
                dl.ConfusionMatrixCallback(
                    input_key="logits2",
                    target_key="targets2",
                    num_classes=num_classes2,
                    prefix="two_cm",
                ))

        # model training
        runner = CustomRunner()
        runner.train(
            engine=engine,
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            num_epochs=1,
            verbose=False,
            callbacks=callbacks,
            loggers={
                "console": dl.ConsoleLogger(),
                "tb": dl.TensorboardLogger("./logs/tb"),
            },
        )
예제 #21
0
def train_experiment(device):
    with TemporaryDirectory() as logdir:
        teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
        student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
        criterion = {
            "cls": nn.CrossEntropyLoss(),
            "kl": nn.KLDivLoss(reduction="batchmean")
        }
        optimizer = optim.Adam(student.parameters(), lr=0.02)

        loaders = {
            "train":
            DataLoader(MNIST(os.getcwd(),
                             train=True,
                             download=True,
                             transform=ToTensor()),
                       batch_size=32),
            "valid":
            DataLoader(MNIST(os.getcwd(),
                             train=False,
                             download=True,
                             transform=ToTensor()),
                       batch_size=32),
        }

        class DistilRunner(dl.Runner):
            def handle_batch(self, batch):
                x, y = batch

                teacher.eval()  # let's manually set teacher model to eval mode
                with torch.no_grad():
                    t_logits = self.model["teacher"](x)

                s_logits = self.model["student"](x)
                self.batch = {
                    "t_logits": t_logits,
                    "s_logits": s_logits,
                    "targets": y,
                    "s_logprobs": F.log_softmax(s_logits, dim=-1),
                    "t_probs": F.softmax(t_logits, dim=-1),
                }

        runner = DistilRunner()
        # model training
        runner.train(
            engine=dl.DeviceEngine(device),
            model={
                "teacher": teacher,
                "student": student
            },
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            num_epochs=1,
            logdir=logdir,
            verbose=True,
            callbacks=[
                dl.AccuracyCallback(input_key="t_logits",
                                    target_key="targets",
                                    num_classes=2,
                                    prefix="teacher_"),
                dl.AccuracyCallback(input_key="s_logits",
                                    target_key="targets",
                                    num_classes=2,
                                    prefix="student_"),
                dl.CriterionCallback(
                    input_key="s_logits",
                    target_key="targets",
                    metric_key="cls_loss",
                    criterion_key="cls",
                ),
                dl.CriterionCallback(
                    input_key="s_logprobs",
                    target_key="t_probs",
                    metric_key="kl_div_loss",
                    criterion_key="kl",
                ),
                dl.MetricAggregationCallback(
                    prefix="loss",
                    metrics=["kl_div_loss", "cls_loss"],
                    mode="mean"),
                dl.OptimizerCallback(metric_key="loss", model_key="student"),
                dl.CheckpointCallback(
                    logdir=logdir,
                    loader_key="valid",
                    metric_key="loss",
                    minimize=True,
                    save_n_best=3,
                ),
            ],
        )
예제 #22
0
def train(dev_dir, logdir, device):
    if not config.joined_vocab:
        spm.SentencePieceTrainer.train(input=f'{dev_dir}/text',
                                       model_prefix=f'{dev_dir}/txt_bpe_ctx',
                                       model_type='bpe',
                                       vocab_size=config.src_vocab_size)
        spm.SentencePieceTrainer.train(
            input=f'{dev_dir}/cmd',
            model_prefix=f'{dev_dir}/cmd_bpe_ctx',
            model_type='bpe',
            vocab_size=config.tgt_vocab_size,
        )
        text_tokenizer = spm.SentencePieceProcessor(
            f'{dev_dir}/txt_bpe_ctx.model')
        cmd_tokenizer = spm.SentencePieceProcessor(
            f'{dev_dir}/cmd_bpe_ctx.model')

    else:
        spm.SentencePieceTrainer.train(
            input=f'{dev_dir}/all',
            model_prefix=f'{dev_dir}/all_bpe_ctx',
            model_type='bpe',
            vocab_size=config.src_vocab_size,
        )
        text_tokenizer = spm.SentencePieceProcessor(
            f'{dev_dir}/all_bpe_ctx.model')
        cmd_tokenizer = text_tokenizer

    train = pd.read_csv(f'{dev_dir}/train.csv', index_col=0)
    train = train.dropna()
    train['cmd_cleaned'] = train['cmd_cleaned'].apply(
        lambda cmd: cmd.replace('|', ' |'))
    train['util'] = train.cmd_cleaned.apply(
        lambda x: x.strip(' $()').split()[0])
    train = train[train.util != ']']
    train = train.reset_index(drop=True)

    mandf = pd.read_csv(f'{dev_dir}/man.csv', index_col=0)
    mandf['ctx'] = mandf.apply(make_ctx, axis=1)
    mandf = mandf.drop_duplicates(subset=('cmd'))
    mandf = mandf.set_index('cmd')

    train['ctx'] = train['util'].map(mandf.ctx)
    train.text_cleaned = train.text_cleaned + ' ' + train.ctx.fillna('')

    train['text_enc'] = train.text_cleaned.progress_apply(
        text_tokenizer.encode)
    train['cmd_enc'] = train.cmd_cleaned.progress_apply(cmd_tokenizer.encode)

    tdf = train[train.origin == 'original']
    tdf2 = train[train.origin != 'original']
    train, valid = train_test_split(tdf, test_size=500, random_state=SEED)
    train = pd.concat([train, tdf2]).reset_index(drop=True)

    train_ds = MtDataset(train.text_enc, train.cmd_enc, config, bos_id, eos_id,
                         pad_id)
    valid_ds = MtDataset(valid.text_enc, valid.cmd_enc, config, bos_id, eos_id,
                         pad_id)

    model = Transformer(config, pad_id)
    print('# params',
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    loaders = {
        'train':
        data.DataLoader(train_ds, batch_size=config.batch_size, shuffle=True),
        'valid':
        data.DataLoader(valid_ds, batch_size=config.batch_size),
    }

    criterion = nn.CrossEntropyLoss(ignore_index=pad_id)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.optimizer_lr,
                                 weight_decay=config.weight_decay,
                                 amsgrad=True)
    callbacks = [
        dl.CheckpointCallback(config.num_epochs),
    ]

    callbacks.append(dl.SchedulerCallback(mode="epoch"))
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        factor=config.plateau_factor,
        patience=3,
        cooldown=2,
        threshold=1e-3,
        min_lr=1e-6)

    shutil.rmtree(logdir, ignore_errors=True)
    os.makedirs(logdir, exist_ok=True)

    runner = dl.SupervisedRunner(device=device)
    runner.train(
        model=model,
        loaders=loaders,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler if config.schedule else None,
        num_epochs=config.num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=callbacks,
        #     check=True
    )