def build_trainer( config, lr: float, serialization_dir: str, num_epochs: int, model: Model, train_loader: DataLoader, dev_loader: DataLoader) -> Trainer: parameters = [(n, p) for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters, lr=lr) if torch.cuda.is_available(): model.cuda() # remove serialization dir if os.path.exists(serialization_dir) and config.shutil_pre_finished_experiment: shutil.rmtree(serialization_dir) if not os.path.exists(serialization_dir): os.makedirs(serialization_dir) trainer = GradientDescentTrainer( model=model, data_loader=train_loader, validation_data_loader=dev_loader, num_epochs=num_epochs, optimizer=optimizer, serialization_dir=serialization_dir, cuda_device=0 if torch.cuda.is_available() else -1 ) return trainer
def build_trainer( model: Model, serialization_dir: str, train_loader: DataLoader, dev_loader: DataLoader = None, num_epochs: int = 1, cuda_device: int = -1, patience: int = None ) -> Trainer: parameters = [ [n, p] for n, p in model.named_parameters() if p.requires_grad ] optimizer = AdamOptimizer(parameters) trainer = GradientDescentTrainer( model=model, serialization_dir=serialization_dir, data_loader=train_loader, validation_data_loader=dev_loader, num_epochs=num_epochs, optimizer=optimizer, cuda_device=cuda_device, patience=patience ) return trainer
def build_trainer( model: Model, serialization_dir: str, train_loader: DataLoader, dev_loader: DataLoader ) -> Trainer: parameters = [ [n, p] for n, p in model.named_parameters() if p.requires_grad ] checkpointer = Checkpointer(serialization_dir, num_serialized_models_to_keep=0) optimizer = AdamOptimizer(parameters) trainer = GradientDescentTrainer( model=model, serialization_dir=serialization_dir, checkpointer=checkpointer, data_loader=train_loader, validation_data_loader=dev_loader, num_epochs=50, optimizer=optimizer, cuda_device=0, validation_metric="-loss", patience=5, ) return trainer
def train(args, model, dataset_reader, train_loader, device=0, val_loader=None, test_data=None, num_epochs=10, patience=None, serialization_dir=None): optimizer = AdamOptimizer(model.named_parameters(), lr=args.lr, weight_decay=args.l2) trainer = GradientDescentTrainer( model=model, optimizer=optimizer, data_loader=train_loader, validation_data_loader=val_loader, cuda_device=device, num_epochs=num_epochs, serialization_dir=serialization_dir, patience=patience, grad_clipping=args.clip, ) trainer.train() if test_data is not None: predictor = Seq2SeqPredictor(model, dataset_reader) for instance in itertools.islice(test_data, 10): print('SOURCE:', instance.fields['source_tokens'].tokens) print('GOLD:', instance.fields['target_tokens'].tokens) print('PRED:', predictor.predict_instance(instance)['predicted_tokens']) print('-' * 50)
def build_trainer(model: Model, serialization_dir: str, train_loader: DataLoader, dev_loader: DataLoader) -> Trainer: device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print("cpu/gpu? ", device) model = model.to(device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters, lr=0.001) scheduler = ReduceOnPlateauLearningRateScheduler(optimizer=optimizer, patience=5, verbose=True) trainer = GradientDescentTrainer( model=model, serialization_dir=serialization_dir, cuda_device=device, data_loader=train_loader, validation_data_loader=dev_loader, learning_rate_scheduler=scheduler, patience=20, num_epochs=200, optimizer=optimizer, validation_metric="+f1", ) return trainer
def build_trainer(model: Model, serialization_dir: str, train_loader: PyTorchDataLoader, dev_loader: PyTorchDataLoader) -> Trainer: parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters, lr=lr) trainer = GradientDescentTrainer(model=model, serialization_dir=serialization_dir, data_loader=train_loader, \ validation_data_loader=dev_loader, num_epochs=num_epoch, optimizer=optimizer, num_gradient_accumulation_steps=grad_accum) return trainer
def init_trainer(self) -> Trainer: parameters = [(n, p) for n, p in self.model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters, lr=self.config.lr) # type: ignore trainer = GradientDescentTrainer( model=self.model, serialization_dir='./output', data_loader=self.train_data_loader, validation_data_loader=self.dev_data_loader, num_epochs=self.config.epoch, optimizer=optimizer, cuda_device=self.config.device, ) return trainer
def build_trainer(model: Model, serialization_dir: str, train_loader: DataLoader, dev_loader: DataLoader) -> Trainer: parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters) trainer = GradientDescentTrainer( model=model, #serialization_dir=serialization_dir, data_loader=train_loader, validation_data_loader=dev_loader, num_epochs=5, optimizer=optimizer, ) return trainer
def build_trainer(model: Model, serialization_dir: str, train_loader: DataLoader, dev_loader: DataLoader) -> Trainer: parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters, lr = 0.001) scheduler = ReduceOnPlateauLearningRateScheduler(optimizer = optimizer, patience = 5, verbose=True) trainer = GradientDescentTrainer(model=model, serialization_dir=serialization_dir, data_loader=train_loader, validation_data_loader=dev_loader, learning_rate_scheduler = scheduler, patience=20, num_epochs=200, optimizer=optimizer, validation_metric = "+accuracy", ) return trainer
def build_trainer(model: Model, serialization_dir:str, train_loader: PyTorchDataLoader, dev_loader: PyTorchDataLoader) -> Trainer: parameters = [[n,p] for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters, lr=lr, weight_decay=weight_decay, betas=(0.9, 0.98), eps=1e-09) lr_scheduler = NoamLR(optimizer, model_size = embedding_dim, warmup_steps = warmup) # lr_scheduler = InverseSquareRootLR(optimizer, warmup_steps = warmup, end_lr = lr) # lr_scheduler = ReduceOnPlateauLearningRateScheduler(optimizer, factor = 0.8, patience = 3, min_lr = 0.000001, eps=1e-08) trainer = GradientDescentTrainer( model=model, serialization_dir=serialization_dir, data_loader=train_loader, \ validation_data_loader=dev_loader, num_epochs=num_epoch, optimizer=optimizer, \ num_gradient_accumulation_steps=grad_accum, grad_norm=grad_norm, patience=patience, learning_rate_scheduler=lr_scheduler) return trainer
def build_trainer(model: Model, ser_dir: str, train_loader: DataLoader, valid_loader: DataLoader, hugging_optim: bool, cuda_device: int) -> Trainer: params = [ [n, p] for n, p in model.named_parameters() if p.requires_grad ] logging.info(f"{len(params)} parameters requiring grad updates") if hugging_optim: optim = HuggingfaceAdamWOptimizer(params, lr=1.0e-5) else: optim = AdamOptimizer(params) return GradientDescentTrainer( model=model, serialization_dir=ser_dir, data_loader=train_loader, validation_data_loader=valid_loader, num_epochs=5, patience=None, # early stopping is disabled optimizer=optim, cuda_device=cuda_device )
def build_trainer(model: Model, serialization_dir: str, train_loader: DataLoader, dev_loader: DataLoader, num_epochs: int, learning_rate: float = 0.001, cuda_device=None) -> Trainer: """ Builds instance of Trainer class with specified training hyperparameters Adapted from https://guide.allennlp.org/training-and-prediction Parameters model : Model The model to train serialization_dir : str Directory to save checkpoints and results train_loader : DataLoader Previously built dataset loader for training data dev_loader : DataLoader Previously built loader for dev data num_epochs : int Number of epochs to train for learning_rate : float (default: 0.001) cuda_device : int (default: None) >=0 if using GPU Returns trainer : Trainer """ parameters = [(n, p) for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters, lr=learning_rate) # type: ignore trainer = GradientDescentTrainer(model=model, checkpointer=Checkpointer( serialization_dir, num_serialized_models_to_keep=-1), serialization_dir=serialization_dir, data_loader=train_loader, validation_data_loader=dev_loader, num_epochs=num_epochs, optimizer=optimizer, cuda_device=cuda_device) print("Will train for", num_epochs, "epochs") return trainer
def build_trainer(model: Model, serialization_dir: str, train_loader: DataLoader, dev_loader: DataLoader) -> Trainer: parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters) # There are a *lot* of other things you could configure with the trainer. See # http://docs.allennlp.org/master/api/training/trainer/#gradientdescenttrainer-objects for more # information. trainer = GradientDescentTrainer( model=model, serialization_dir=serialization_dir, data_loader=train_loader, validation_data_loader=dev_loader, num_epochs=5, optimizer=optimizer, validation_metric="+accuracy", ) return trainer
def build_trainer( config, model: Model, train_loader: DataLoader, dev_loader: DataLoader, ) -> Trainer: parameters = [(n, p) for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters, lr=config.lr) # type: ignore model.cuda() trainer = GradientDescentTrainer( model=model, data_loader=train_loader, validation_data_loader=dev_loader, num_epochs=config.num_epochs, optimizer=optimizer, cuda_device=0, serialization_dir=config.serialization_dir) return trainer
def build_classifier_trainer(model: Model, serialization_dir: str, train_loader: DataLoader, dev_loader: DataLoader, num_epochs: int = 1, cuda_device: int = -1, learning_rate: float = 0.000025, world_size: int = 1, distributed: bool = False) -> Trainer: parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters, lr=learning_rate) trainer = GradientDescentTrainer(model=model, serialization_dir=serialization_dir, data_loader=train_loader, validation_data_loader=dev_loader, num_epochs=num_epochs, optimizer=optimizer, cuda_device=cuda_device, world_size=world_size, distributed=distributed, validation_metric='+accuracy') return trainer
def init_trainer(self) -> Trainer: parameters = [(n, p) for n, p in self.model.named_parameters() if p.requires_grad] group_parameter_group = [(['_text_field_embedder.*'], { 'lr': self.config.lr }), (['_classification_layer.*'], { 'lr': self.config.classifier_lr })] optimizer = AdamOptimizer(parameters, parameter_groups=group_parameter_group, lr=self.config.lr) # type: ignore trainer = GradientDescentTrainer( model=self.model, serialization_dir='./output', data_loader=self.train_data_loader, validation_data_loader=self.dev_data_loader, num_epochs=self.config.epoch, optimizer=optimizer, cuda_device=self.config.device, ) return trainer
{'tokens': Embedding( embedding_dim=10, num_embeddings=vocab.get_vocab_size('tokens'))}) encoder = LstmSeq2VecEncoder(10, 32, bidirectional=True) # encoder = BagOfEmbeddingsEncoder(embedding_dim=10) model = IntentEstimator(vocab, embedder, encoder) model.cuda() # モデルの学習 with tempfile.TemporaryDirectory() as serialization_dir: parameters = [ [n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = AdamOptimizer(parameters) trainer = GradientDescentTrainer( model=model, serialization_dir=serialization_dir, data_loader=train_loader, validation_data_loader=valid_loader, num_epochs=20, optimizer=optimizer, cuda_device=0) trainer.train() # モデルの実行 predictor = IntentEstimatorPredictor(model, dataset_reader)
# テキストの特徴ベクトルの作成 text_embedder = BasicTextFieldEmbedder({"tokens": embedding}) encoder = BagOfEmbeddingsEncoder(embedding_dim=100) # 文書分類器の作成 model = BasicClassifier(vocab=vocab, text_field_embedder=text_embedder, seq2vec_encoder=encoder) # データローダ train_loader = PyTorchDataLoader(train_dataset, batch_size=32, shuffle=True) validation_loader = PyTorchDataLoader(validation_dataset, batch_size=32, shuffle=False) # GPU上にモデルをコピー # model = model.cuda() # オプティマイザの作成 optimizer = AdamOptimizer(model.named_parameters()) # トレイナの作成 trainer = GradientDescentTrainer(model=model, optimizer=optimizer, data_loader=train_loader, validation_data_loader=validation_loader, num_epochs=10, patience=3) metrics = trainer.train() pprint.pprint(metrics)