max_len=max_len) return DataLoader(ds, batch_size=batch_size) train_data_loader = create_data_loader(data, tokenizer, args.max_seq_length, args.batch_size) test_data_loader = create_data_loader(test_data, tokenizer, args.max_seq_length, args.batch_size) model = SentimentClassifier(3) if args.init_checkpoint and os.path.exists(args.init_checkpoint): model.load_state_dict(torch.load(args.init_checkpoint)) model.to(device) optimizer = AdamW(model.parameters(), lr=args.learning_rate) total_steps = len(train_data_loader) * args.num_train_epochs scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps) # loss_fn = nn.CrossEntropyLoss().to(device) best_accuracy = 0 global_step = 0 for epoch_num in range(int(args.num_train_epochs)): model.train() train_loss, train_accuracy = 0, 0 nb_train_steps, nb_train_examples = 0, 0
def train_det( model: torch.nn.Module, train_file: ty.Union[str, pathlib.Path], span_digitizer: ty.Callable[[ty.Mapping[str, ty.Any]], datatools.FeaturefulSpan], types_lex: lexicon.Lexicon, out_dir: ty.Union[str, pathlib.Path], temp_dir: ty.Union[str, pathlib.Path], device: ty.Union[str, torch.device], epochs: int, patience: int, mention_boost: ty.Optional[float] = None, dev_file: ty.Union[str, pathlib.Path] = None, test_file: ty.Union[str, pathlib.Path] = None, train_batch_size: int = 32, eval_batch_size: int = 128, trainer_cls=runners.SinkTrainer, *, num_workers: int = 0, debug: bool = False, config: ty.Optional[ty.Dict[str, ty.Any]] = None, **kwargs, ) -> ty.Tuple[ignite.engine.Engine, ty.Iterable, ty.Dict[str, ty.Any]]: logger.info("Training mention detection") config = defaultdict(lambda: None, config if config is not None else dict()) device = torch.device(device) # type: ignore model = model.to(device) train_set = datatools.SpansDataset.from_json( train_file, span_digitizer=span_digitizer, tags_lexicon=types_lex, cache_dir=temp_dir, set_name="train_det", ) train_loader = torch.utils.data.DataLoader( dataset=train_set, sampler=torch.utils.data.BatchSampler( torch.utils.data.RandomSampler(train_set), batch_size=train_batch_size, drop_last=False, ), collate_fn=lambda x: x[0], num_workers=num_workers, ) dev_loader: ty.Optional[torch.utils.data.DataLoader] if dev_file is not None: dev_set = datatools.SpansDataset.from_json( dev_file, span_digitizer=span_digitizer, tags_lexicon=types_lex, cache_dir=temp_dir, set_name="dev_det", ) dev_loader = torch.utils.data.DataLoader( dataset=dev_set, sampler=torch.utils.data.BatchSampler( torch.utils.data.SequentialSampler(dev_set), batch_size=eval_batch_size, drop_last=False, ), collate_fn=lambda x: x[0], num_workers=num_workers, ) else: dev_set = None dev_loader = None if mention_boost is not None: class_weight = torch.tensor( [1 if c is None else mention_boost for c in types_lex.i2t], device=device, dtype=torch.float, ) logger.debug( f"Training with weights {class_weight} for weighted nll_loss") def loss_fun(output, target): return libdecofre.averaged_nll_loss(output.to(device=device), target.to(device=device), weight=class_weight) else: logger.debug("Training with unweighted batch-averaged NLL loss") def loss_fun(output, target): return torch.nn.functional.nll_loss(output.to(device=device), target.to(device=device), reduction="mean") # TODO: use accuracy instead ? def dev_loss(output, target): return torch.nn.functional.nll_loss(output.to(device=device), target.to(device=device), reduction="mean") train_classif = runners.ClassificationMetrics( types_lex.i2t, output_transform=runners.extract_output, aggregates={"mentions": [t for t in types_lex.i2t if t is not None]}, ) dev_classif = runners.ClassificationMetrics( types_lex.i2t, output_transform=runners.extract_output, aggregates={"mentions": [t for t in types_lex.i2t if t is not None]}, ) optimizer = AdamW( filter(lambda x: x.requires_grad, model.parameters()), lr=config["lr"], weight_decay=config["weight-decay"], ) det_trainer = trainer_cls( model, checkpointed_models={"det": model}, loss_fun=loss_fun, optimizer=optimizer, dev_loss=dev_loss, train_metrics={"classif": train_classif}, dev_metrics={"classif": dev_classif}, save_path=out_dir, debug=debug, **kwargs, ) if config["lr-schedule"] == "step": logger.debug("Training with 'step' LR schedule, using γ=0.95") torch_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, len(train_loader), gamma=0.95) scheduler = ignite.contrib.handlers.create_lr_scheduler_with_warmup( torch_lr_scheduler, warmup_start_value=0.0, warmup_end_value=optimizer.defaults["lr"], warmup_duration=1000, ) det_trainer.add_event_handler(ignite.engine.Events.ITERATION_STARTED, scheduler) return ( det_trainer, train_loader, { "max_epochs": epochs, "patience": patience, "dev_loader": dev_loader, "run_name": "mention_detection", }, )
# Optimize for cross entropy using Adam criterion = { "CE": CrossentropyND(), } learning_rate = 0.001 encoder_learning_rate = 0.0005 encoder_weight_decay = 0.00003 optimizer_weight_decay = 0.0003 optim_factor = 0.25 optim_patience = 2 optimizer = AdamW( model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False, ) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=optim_factor, patience=optim_patience) num_epochs = 10 device = utils.get_device() runner = SupervisedRunner(device=device, input_key="image", input_target_key="mask")
class Recommender(object): def __init__(self, train, test, device, args, item_probs=None, doc2vec=None): self.args = args self.device = device self.train = train self.test = test self.test_sequence = train.test_sequences self._num_items = train.num_items self._num_users = train.num_users self._net = SelCa(self._num_users, self._num_items, args).to(self.device) self._optimizer = AdamW(self._net.parameters(), weight_decay=args.l2, lr=args.learning_rate) self.scheduler = StepLR(self._optimizer, step_size=args.decay_step, gamma=args.lr_decay) train_dataset = SelCaDataset(train, num_neg_samples=args.neg_samples) self.train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.n_jobs, pin_memory=True) # initialize category embedding vector if doc2vec is not None: item_vecs = [] for i in range(1, self._num_items): item_vec = normalize(doc2vec.wv[f'i_{i}'].reshape(1, -1)) item_vecs.append(item_vec.reshape(-1)) self._net.item_embeddings.weight.data[i] = torch.FloatTensor( item_vec) for i in range(self._num_users): user_vec = normalize(doc2vec.docvecs[f'u_{i}'].reshape(1, -1)) self._net.user_embeddings.weight.data[i] = torch.FloatTensor( user_vec) item_vecs = np.stack(item_vecs) category_vec = item_probs @ item_vecs category_vec = normalize(category_vec) self._net.category_embeddings.weight.data = torch.FloatTensor( category_vec).to(device) def train_one_epoch(self): self._net.train() avg_loss = 0.0 for minibatch_num, (user, sequence, prob, neg_samples, target) in enumerate(self.train_dataloader): user = user.to(self.device) sequence = sequence.to(self.device) prob = prob.to(self.device) target = target.to(self.device) neg_samples = neg_samples.to(self.device) target_prediction = self._net(sequence, user, target, prob, self.device) negative_prediction = self._net(sequence, user, neg_samples, prob, self.device, use_cache=True) self._optimizer.zero_grad() positive_loss = -torch.mean( torch.log(torch.sigmoid(target_prediction) + 1e-8)) negative_loss = -torch.mean( torch.log(1 - torch.sigmoid(negative_prediction) + 1e-8)) loss = positive_loss + negative_loss loss.backward() self._optimizer.step() avg_loss += loss.item() avg_loss /= minibatch_num + 1 self.scheduler.step() return avg_loss def fit(self): # train valid_aps = 0 for e in range(args.n_epochs): t1 = time() avg_loss = self.train_one_epoch() t2 = time() if e % 5 == 0 or e == self.args.n_epochs - 1: precision, recall, mean_aps = evaluate_ranking(self, self.test, self.train, k=[1, 5, 10]) precs = [np.mean(p) for p in precision] recalls = [np.mean(r) for r in recall] output_str = f"Epoch {e+1} [{t2-t1:.1f}s]\tloss={avg_loss:.4f}, map={mean_aps:.4f}, " \ f"prec@1={precs[0]:.4f}, prec@5={precs[1]:.4f}, prec@10={precs[2]:.4f}, " \ f"recall@1={recalls[0]:.4f}, recall@5={recalls[1]:.4f}, recall@10={recalls[2]:.4f}, [{time()-t2:.1f}s]" if mean_aps >= valid_aps: mean_aps = valid_aps else: break print(output_str) return { 'epochs': e, 'loss': avg_loss, 'mAP': mean_aps, 'prec1': precs[0], 'prec5': precs[1], 'prec10': precs[2], 'recall1': recalls[0], 'recall5': recalls[1], 'recall10': recalls[2] } def predict(self, user_id, item_ids=None): self._net.eval() sequence = self.test_sequence.sequences[user_id, :] sequence = np.atleast_2d(sequence) with torch.no_grad(): sequences = torch.from_numpy( sequence.astype(np.int64).reshape(1, -1)).to(self.device) item_ids = torch.from_numpy( np.arange(self._num_items).reshape(-1, 1).astype(np.int64)).to( self.device) user_id = torch.from_numpy(np.array([[user_id]]).astype( np.int64)).to(self.device) probs = torch.from_numpy(self.test_sequence.probs[user_id, :]).to( self.device) out = self._net(sequences, user_id, item_ids, probs, self.device, for_pred=True) return out
def adamw(model: Module) -> OptimizerSchedulerBundle: optimizer = AdamW(model.parameters()) return OptimizerSchedulerBundle(optimizer=optimizer)
def main(): parser = argparse.ArgumentParser( description='20bn-jester-v1 Gesture Classification with Backpropamine') parser.add_argument('--batch-size', type=int, default=8, metavar='N', help='input batch size for training (default: 8)') #parser.add_argument('--validation-batch-size', type=int, default=1000, metavar='N', # help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('--num-workers', type=int, default=0, metavar='W', help='number of workers for data loading (default: 0)') parser.add_argument('--lr', type=float, default=0.0001, metavar='LR', help='learning rate (default: 0.0001)') parser.add_argument('--gamma', type=float, default=0.7, metavar='M', help='Learning rate step gamma (default: 0.7)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--dry-run', action='store_true', default=False, help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--dataset-dir', type=str, default=r"./dataset", metavar='D', help='dataset place (default: ./dataset)') #parser.add_argument('--log-interval', type=int, default=10, metavar='N', # help='how many batches to wait before logging training status') #parser.add_argument('--save-model', action='store_true', default=False, # help='For Saving the current Model') parser.add_argument('--no-resume', action='store_true', default=False, help='switch to disables resume') parser.add_argument( '--use-lstm', action='store_true', default=False, help='switch to use LSTM module instead of backpropamine') parser.add_argument('--frame-step', type=int, default=2, metavar='FS', help='step of video frames extraction (default: 2)') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') torch.manual_seed(args.seed) train_data = MyDataset('train', args.dataset_dir, frame_step=args.frame_step) validation_data = MyDataset('validation', args.dataset_dir, frame_step=args.frame_step) train_dataloader = DataLoader(train_data, batch_size=args.batch_size, drop_last=True, shuffle=True, collate_fn=collate_fn, num_workers=args.num_workers) validation_dataloader = DataLoader(validation_data, batch_size=args.batch_size, drop_last=True, shuffle=True, collate_fn=collate_fn, num_workers=args.num_workers) resume = not args.no_resume if resume: try: checkpoint = torch.load("checkpoint.pt") except FileNotFoundError: resume = False mode = 'LSTM' if args.use_lstm else 'backpropamine' model = Net(mode=mode).to(device) optimizer = AdamW(model.parameters(), lr=args.lr) scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) last_epoch, max_epoch = 0, args.epochs if resume: model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) last_epoch = checkpoint['last_epoch'] validator = Validator(model, validation_dataloader, device, args.dry_run) trainer = Trainer(model, optimizer, train_dataloader, scheduler, last_epoch, max_epoch, device, validator, args.dry_run) print(vars(args)) trainer() print("finish.")