Esempio n. 1
0
                         max_len=max_len)

    return DataLoader(ds, batch_size=batch_size)


train_data_loader = create_data_loader(data, tokenizer, args.max_seq_length,
                                       args.batch_size)
test_data_loader = create_data_loader(test_data, tokenizer,
                                      args.max_seq_length, args.batch_size)

model = SentimentClassifier(3)
if args.init_checkpoint and os.path.exists(args.init_checkpoint):
    model.load_state_dict(torch.load(args.init_checkpoint))
model.to(device)

optimizer = AdamW(model.parameters(), lr=args.learning_rate)

total_steps = len(train_data_loader) * args.num_train_epochs

scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps=0,
                                            num_training_steps=total_steps)

# loss_fn = nn.CrossEntropyLoss().to(device)

best_accuracy = 0
global_step = 0
for epoch_num in range(int(args.num_train_epochs)):
    model.train()
    train_loss, train_accuracy = 0, 0
    nb_train_steps, nb_train_examples = 0, 0
Esempio n. 2
0
def train_det(
    model: torch.nn.Module,
    train_file: ty.Union[str, pathlib.Path],
    span_digitizer: ty.Callable[[ty.Mapping[str, ty.Any]],
                                datatools.FeaturefulSpan],
    types_lex: lexicon.Lexicon,
    out_dir: ty.Union[str, pathlib.Path],
    temp_dir: ty.Union[str, pathlib.Path],
    device: ty.Union[str, torch.device],
    epochs: int,
    patience: int,
    mention_boost: ty.Optional[float] = None,
    dev_file: ty.Union[str, pathlib.Path] = None,
    test_file: ty.Union[str, pathlib.Path] = None,
    train_batch_size: int = 32,
    eval_batch_size: int = 128,
    trainer_cls=runners.SinkTrainer,
    *,
    num_workers: int = 0,
    debug: bool = False,
    config: ty.Optional[ty.Dict[str, ty.Any]] = None,
    **kwargs,
) -> ty.Tuple[ignite.engine.Engine, ty.Iterable, ty.Dict[str, ty.Any]]:
    logger.info("Training mention detection")
    config = defaultdict(lambda: None,
                         config if config is not None else dict())
    device = torch.device(device)  # type: ignore
    model = model.to(device)
    train_set = datatools.SpansDataset.from_json(
        train_file,
        span_digitizer=span_digitizer,
        tags_lexicon=types_lex,
        cache_dir=temp_dir,
        set_name="train_det",
    )
    train_loader = torch.utils.data.DataLoader(
        dataset=train_set,
        sampler=torch.utils.data.BatchSampler(
            torch.utils.data.RandomSampler(train_set),
            batch_size=train_batch_size,
            drop_last=False,
        ),
        collate_fn=lambda x: x[0],
        num_workers=num_workers,
    )
    dev_loader: ty.Optional[torch.utils.data.DataLoader]
    if dev_file is not None:
        dev_set = datatools.SpansDataset.from_json(
            dev_file,
            span_digitizer=span_digitizer,
            tags_lexicon=types_lex,
            cache_dir=temp_dir,
            set_name="dev_det",
        )
        dev_loader = torch.utils.data.DataLoader(
            dataset=dev_set,
            sampler=torch.utils.data.BatchSampler(
                torch.utils.data.SequentialSampler(dev_set),
                batch_size=eval_batch_size,
                drop_last=False,
            ),
            collate_fn=lambda x: x[0],
            num_workers=num_workers,
        )

    else:
        dev_set = None
        dev_loader = None

    if mention_boost is not None:
        class_weight = torch.tensor(
            [1 if c is None else mention_boost for c in types_lex.i2t],
            device=device,
            dtype=torch.float,
        )
        logger.debug(
            f"Training with weights {class_weight} for weighted nll_loss")

        def loss_fun(output, target):
            return libdecofre.averaged_nll_loss(output.to(device=device),
                                                target.to(device=device),
                                                weight=class_weight)

    else:
        logger.debug("Training with unweighted batch-averaged NLL loss")

        def loss_fun(output, target):
            return torch.nn.functional.nll_loss(output.to(device=device),
                                                target.to(device=device),
                                                reduction="mean")

    # TODO: use accuracy instead ?
    def dev_loss(output, target):
        return torch.nn.functional.nll_loss(output.to(device=device),
                                            target.to(device=device),
                                            reduction="mean")

    train_classif = runners.ClassificationMetrics(
        types_lex.i2t,
        output_transform=runners.extract_output,
        aggregates={"mentions": [t for t in types_lex.i2t if t is not None]},
    )
    dev_classif = runners.ClassificationMetrics(
        types_lex.i2t,
        output_transform=runners.extract_output,
        aggregates={"mentions": [t for t in types_lex.i2t if t is not None]},
    )
    optimizer = AdamW(
        filter(lambda x: x.requires_grad, model.parameters()),
        lr=config["lr"],
        weight_decay=config["weight-decay"],
    )
    det_trainer = trainer_cls(
        model,
        checkpointed_models={"det": model},
        loss_fun=loss_fun,
        optimizer=optimizer,
        dev_loss=dev_loss,
        train_metrics={"classif": train_classif},
        dev_metrics={"classif": dev_classif},
        save_path=out_dir,
        debug=debug,
        **kwargs,
    )
    if config["lr-schedule"] == "step":
        logger.debug("Training with 'step' LR schedule, using γ=0.95")
        torch_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                             len(train_loader),
                                                             gamma=0.95)
        scheduler = ignite.contrib.handlers.create_lr_scheduler_with_warmup(
            torch_lr_scheduler,
            warmup_start_value=0.0,
            warmup_end_value=optimizer.defaults["lr"],
            warmup_duration=1000,
        )
        det_trainer.add_event_handler(ignite.engine.Events.ITERATION_STARTED,
                                      scheduler)

    return (
        det_trainer,
        train_loader,
        {
            "max_epochs": epochs,
            "patience": patience,
            "dev_loader": dev_loader,
            "run_name": "mention_detection",
        },
    )
Esempio n. 3
0
# Optimize for cross entropy using Adam
criterion = {
    "CE": CrossentropyND(),
}

learning_rate = 0.001
encoder_learning_rate = 0.0005
encoder_weight_decay = 0.00003
optimizer_weight_decay = 0.0003
optim_factor = 0.25
optim_patience = 2

optimizer = AdamW(
    model.parameters(),
    lr=0.001,
    betas=(0.9, 0.999),
    eps=1e-08,
    weight_decay=0.01,
    amsgrad=False,
)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                 factor=optim_factor,
                                                 patience=optim_patience)

num_epochs = 10
device = utils.get_device()

runner = SupervisedRunner(device=device,
                          input_key="image",
                          input_target_key="mask")
Esempio n. 4
0
class Recommender(object):
    def __init__(self,
                 train,
                 test,
                 device,
                 args,
                 item_probs=None,
                 doc2vec=None):
        self.args = args
        self.device = device

        self.train = train
        self.test = test

        self.test_sequence = train.test_sequences
        self._num_items = train.num_items
        self._num_users = train.num_users

        self._net = SelCa(self._num_users, self._num_items,
                          args).to(self.device)

        self._optimizer = AdamW(self._net.parameters(),
                                weight_decay=args.l2,
                                lr=args.learning_rate)
        self.scheduler = StepLR(self._optimizer,
                                step_size=args.decay_step,
                                gamma=args.lr_decay)

        train_dataset = SelCaDataset(train, num_neg_samples=args.neg_samples)
        self.train_dataloader = DataLoader(train_dataset,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=args.n_jobs,
                                           pin_memory=True)

        # initialize category embedding vector
        if doc2vec is not None:
            item_vecs = []
            for i in range(1, self._num_items):
                item_vec = normalize(doc2vec.wv[f'i_{i}'].reshape(1, -1))
                item_vecs.append(item_vec.reshape(-1))
                self._net.item_embeddings.weight.data[i] = torch.FloatTensor(
                    item_vec)

            for i in range(self._num_users):
                user_vec = normalize(doc2vec.docvecs[f'u_{i}'].reshape(1, -1))
                self._net.user_embeddings.weight.data[i] = torch.FloatTensor(
                    user_vec)

            item_vecs = np.stack(item_vecs)
            category_vec = item_probs @ item_vecs
            category_vec = normalize(category_vec)
            self._net.category_embeddings.weight.data = torch.FloatTensor(
                category_vec).to(device)

    def train_one_epoch(self):
        self._net.train()
        avg_loss = 0.0
        for minibatch_num, (user, sequence, prob, neg_samples,
                            target) in enumerate(self.train_dataloader):
            user = user.to(self.device)
            sequence = sequence.to(self.device)
            prob = prob.to(self.device)
            target = target.to(self.device)
            neg_samples = neg_samples.to(self.device)

            target_prediction = self._net(sequence, user, target, prob,
                                          self.device)
            negative_prediction = self._net(sequence,
                                            user,
                                            neg_samples,
                                            prob,
                                            self.device,
                                            use_cache=True)

            self._optimizer.zero_grad()

            positive_loss = -torch.mean(
                torch.log(torch.sigmoid(target_prediction) + 1e-8))
            negative_loss = -torch.mean(
                torch.log(1 - torch.sigmoid(negative_prediction) + 1e-8))
            loss = positive_loss + negative_loss
            loss.backward()
            self._optimizer.step()
            avg_loss += loss.item()

        avg_loss /= minibatch_num + 1
        self.scheduler.step()
        return avg_loss

    def fit(self):
        # train
        valid_aps = 0
        for e in range(args.n_epochs):
            t1 = time()
            avg_loss = self.train_one_epoch()
            t2 = time()
            if e % 5 == 0 or e == self.args.n_epochs - 1:
                precision, recall, mean_aps = evaluate_ranking(self,
                                                               self.test,
                                                               self.train,
                                                               k=[1, 5, 10])
                precs = [np.mean(p) for p in precision]
                recalls = [np.mean(r) for r in recall]
                output_str = f"Epoch {e+1} [{t2-t1:.1f}s]\tloss={avg_loss:.4f}, map={mean_aps:.4f}, " \
                             f"prec@1={precs[0]:.4f}, prec@5={precs[1]:.4f}, prec@10={precs[2]:.4f}, " \
                             f"recall@1={recalls[0]:.4f}, recall@5={recalls[1]:.4f}, recall@10={recalls[2]:.4f}, [{time()-t2:.1f}s]"

                if mean_aps >= valid_aps:
                    mean_aps = valid_aps
                else:
                    break

        print(output_str)
        return {
            'epochs': e,
            'loss': avg_loss,
            'mAP': mean_aps,
            'prec1': precs[0],
            'prec5': precs[1],
            'prec10': precs[2],
            'recall1': recalls[0],
            'recall5': recalls[1],
            'recall10': recalls[2]
        }

    def predict(self, user_id, item_ids=None):
        self._net.eval()

        sequence = self.test_sequence.sequences[user_id, :]
        sequence = np.atleast_2d(sequence)

        with torch.no_grad():
            sequences = torch.from_numpy(
                sequence.astype(np.int64).reshape(1, -1)).to(self.device)
            item_ids = torch.from_numpy(
                np.arange(self._num_items).reshape(-1, 1).astype(np.int64)).to(
                    self.device)
            user_id = torch.from_numpy(np.array([[user_id]]).astype(
                np.int64)).to(self.device)
            probs = torch.from_numpy(self.test_sequence.probs[user_id, :]).to(
                self.device)
            out = self._net(sequences,
                            user_id,
                            item_ids,
                            probs,
                            self.device,
                            for_pred=True)
        return out
Esempio n. 5
0
def adamw(model: Module) -> OptimizerSchedulerBundle:
    optimizer = AdamW(model.parameters())
    return OptimizerSchedulerBundle(optimizer=optimizer)
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser(
        description='20bn-jester-v1 Gesture Classification with Backpropamine')
    parser.add_argument('--batch-size',
                        type=int,
                        default=8,
                        metavar='N',
                        help='input batch size for training (default: 8)')
    #parser.add_argument('--validation-batch-size', type=int, default=1000, metavar='N',
    #                    help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--num-workers',
                        type=int,
                        default=0,
                        metavar='W',
                        help='number of workers for data loading (default: 0)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0001,
                        metavar='LR',
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.7,
                        metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--dry-run',
                        action='store_true',
                        default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--dataset-dir',
                        type=str,
                        default=r"./dataset",
                        metavar='D',
                        help='dataset place (default: ./dataset)')
    #parser.add_argument('--log-interval', type=int, default=10, metavar='N',
    #                    help='how many batches to wait before logging training status')
    #parser.add_argument('--save-model', action='store_true', default=False,
    #                    help='For Saving the current Model')
    parser.add_argument('--no-resume',
                        action='store_true',
                        default=False,
                        help='switch to disables resume')
    parser.add_argument(
        '--use-lstm',
        action='store_true',
        default=False,
        help='switch to use LSTM module instead of backpropamine')
    parser.add_argument('--frame-step',
                        type=int,
                        default=2,
                        metavar='FS',
                        help='step of video frames extraction (default: 2)')
    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')

    torch.manual_seed(args.seed)

    train_data = MyDataset('train',
                           args.dataset_dir,
                           frame_step=args.frame_step)
    validation_data = MyDataset('validation',
                                args.dataset_dir,
                                frame_step=args.frame_step)
    train_dataloader = DataLoader(train_data,
                                  batch_size=args.batch_size,
                                  drop_last=True,
                                  shuffle=True,
                                  collate_fn=collate_fn,
                                  num_workers=args.num_workers)
    validation_dataloader = DataLoader(validation_data,
                                       batch_size=args.batch_size,
                                       drop_last=True,
                                       shuffle=True,
                                       collate_fn=collate_fn,
                                       num_workers=args.num_workers)

    resume = not args.no_resume

    if resume:
        try:
            checkpoint = torch.load("checkpoint.pt")
        except FileNotFoundError:
            resume = False

    mode = 'LSTM' if args.use_lstm else 'backpropamine'
    model = Net(mode=mode).to(device)
    optimizer = AdamW(model.parameters(), lr=args.lr)
    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)

    last_epoch, max_epoch = 0, args.epochs

    if resume:
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        last_epoch = checkpoint['last_epoch']

    validator = Validator(model, validation_dataloader, device, args.dry_run)
    trainer = Trainer(model, optimizer, train_dataloader, scheduler,
                      last_epoch, max_epoch, device, validator, args.dry_run)

    print(vars(args))
    trainer()
    print("finish.")