def __init__(self, num_embeddings, embedding_dim, embeddings=None, noise=.0, dropout=.0, scale=1., trainable=False): """ Define the layer of the model and perform the initializations of the layers (wherever it is necessary) Args: embeddings (numpy.ndarray): the 2D ndarray with the word vectors noise (float): dropout (float): trainable (bool): """ super(Embed, self).__init__() self.scale = scale # scale embeddings by value. Needed for transformer # define the embedding layer, with the corresponding dimensions self.embedding = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim) if embeddings is not None: log.info("Initializing Embedding layer with pre-trained weights!") self.init_embeddings(embeddings, trainable) if not trainable: self.embedding.weight.requires_grad = False # the dropout "layer" for the word embeddings self.dropout = nn.Dropout(dropout) # the gaussian noise "layer" for the word embeddings self.noise = GaussianNoise(noise)
def timed(*args: types.T, **kwargs: types.T): ts = time.time() result = func(*args, **kwargs) te = time.time() elapsed = f'{te - ts}' log.info('BENCHMARK: {f}(*{a}, **{kw}) took: {t} sec'.format( f=func.__name__, a=args, kw=kwargs, t=elapsed)) return result
def fit(self: TrainerType, train_loader: DataLoader, val_loader: DataLoader, epochs: int = 50) -> State: log.info('Trainer will run for\n' f'model: {self.model}\n' f'optimizer: {self.optimizer}\n' f'loss: {self.loss_fn}') self.val_handler.attach(self.trainer, self.train_evaluator, train_loader, validation=False) self.val_handler.attach(self.trainer, self.valid_evaluator, val_loader, validation=True) self.model.zero_grad() self.trainer.run(train_loader, max_epochs=epochs)
def fit(self: TrainerType, train_loader: DataLoader, val_loader: DataLoader, epochs: int = 50) -> State: log.info('Trainer will run for\n' f'model: {self.model}\n' f'optimizer: {self.optimizer}\n' f'loss: {self.loss_fn}') self.val_handler.attach(self.trainer, self.train_evaluator, train_loader, validation=False) self.val_handler.attach(self.trainer, self.valid_evaluator, val_loader, validation=True) self.model.zero_grad() self.trainer.run(train_loader, max_epochs=epochs) best_score = (-self.early_stop.best_score if self.early_stop else self.valid_evaluator.state.metrics['loss']) return best_score
def __init__(self: TrainerType, model: nn.Module, optimizer: Optimizer, checkpoint_dir: str = '../../checkpoints', experiment_name: str = 'experiment', model_checkpoint: Optional[str] = None, optimizer_checkpoint: Optional[str] = None, metrics: types.GenericDict = None, patience: int = 10, validate_every: int = 1, accumulation_steps: int = 1, loss_fn: Union[_Loss, DataParallelCriterion] = None, non_blocking: bool = True, retain_graph: bool = False, dtype: torch.dtype = torch.float, device: str = 'cpu', parallel: bool = False) -> None: self.dtype = dtype self.retain_graph = retain_graph self.non_blocking = non_blocking self.device = device self.loss_fn = loss_fn self.validate_every = validate_every self.patience = patience self.accumulation_steps = accumulation_steps self.checkpoint_dir = checkpoint_dir model_checkpoint = self._check_checkpoint(model_checkpoint) optimizer_checkpoint = self._check_checkpoint(optimizer_checkpoint) self.model = cast( nn.Module, from_checkpoint(model_checkpoint, model, map_location=torch.device('cpu'))) self.model = self.model.type(dtype).to(device) self.optimizer = from_checkpoint(optimizer_checkpoint, optimizer) self.parallel = parallel if parallel: if device == 'cpu': raise ValueError("parallel can be used only with cuda device") self.model = DataParallelModel(self.model).to(device) self.loss_fn = DataParallelCriterion(self.loss_fn) # type: ignore if metrics is None: metrics = {} if 'loss' not in metrics: if self.parallel: metrics['loss'] = Loss( lambda x, y: self.loss_fn(x, y).mean()) # type: ignore else: metrics['loss'] = Loss(self.loss_fn) self.trainer = Engine(self.train_step) self.train_evaluator = Engine(self.eval_step) self.valid_evaluator = Engine(self.eval_step) for name, metric in metrics.items(): metric.attach(self.train_evaluator, name) metric.attach(self.valid_evaluator, name) self.pbar = ProgressBar() self.val_pbar = ProgressBar(desc='Validation') if checkpoint_dir is not None: self.checkpoint = CheckpointHandler(checkpoint_dir, experiment_name, score_name='validation_loss', score_function=self._score_fn, n_saved=2, require_empty=False, save_as_state_dict=True) self.early_stop = EarlyStopping(patience, self._score_fn, self.trainer) self.val_handler = EvaluationHandler(pbar=self.pbar, validate_every=1, early_stopping=self.early_stop) self.attach() log.info( f'Trainer configured to run {experiment_name}\n' f'\tpretrained model: {model_checkpoint} {optimizer_checkpoint}\n' f'\tcheckpoint directory: {checkpoint_dir}\n' f'\tpatience: {patience}\n' f'\taccumulation steps: {accumulation_steps}\n' f'\tnon blocking: {non_blocking}\n' f'\tretain graph: {retain_graph}\n' f'\tdevice: {device}\n' f'\tmodel dtype: {dtype}\n' f'\tparallel: {parallel}')
def load(self) -> types.Embeddings: """ Read the word vectors from a text file Returns: word2idx (dict): dictionary of words to ids idx2word (dict): dictionary of ids to words embeddings (numpy.ndarray): the word embeddings matrix """ # in order to avoid this time consuming operation, cache the results try: cache = self._load_cache() log.info("Loaded word embeddings from cache.") return cache except OSError: log.warning( f"Didn't find embeddings cache file {self.embeddings_file}") # create the necessary dictionaries and the word embeddings matrix if not os.path.exists(self.embeddings_file): log.critical(f"{self.embeddings_file} not found!") raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), self.embeddings_file) log.info(f'Indexing file {self.embeddings_file} ...') # create the 2D array, which will be used for initializing # the Embedding layer of a NN. # We reserve the first row (idx=0), as the word embedding, # which will be used for zero padding (word with id = 0). word2idx, idx2word, embeddings = self.augment_embeddings( {}, {}, [], self.extra_tokens.PAD.value, emb=np.zeros(self.dim_)) for token in self.extra_tokens: if token == self.extra_tokens.PAD: continue word2idx, idx2word, embeddings = self.augment_embeddings( word2idx, idx2word, embeddings, token.value) # read file, line by line with open(self.embeddings_file, "r") as f: index = len(embeddings) for line in f: # skip the first row if it is a header if len(line.split()) < self.dim_: continue values = line.rstrip().split(" ") word = values[0] if word in word2idx: continue vector = np.asarray(values[1:], dtype=np.float32) idx2word[index] = word word2idx[word] = index embeddings.append(vector) index += 1 log.info(f'Found {len(embeddings)} word vectors.') embeddings = np.array(embeddings, dtype='float32') # write the data to a cache file self._dump_cache((word2idx, idx2word, embeddings)) return word2idx, idx2word, embeddings
def _get_cache_name(self) -> str: head, tail = os.path.split(self.embeddings_file) filename, ext = os.path.splitext(tail) cache_name = os.path.join(head, f'{filename}.p') log.info(f'Cache: {cache_name}') return cache_name
bidirectional=True, merge_bi='cat', packed_sequence=True, attention=True, device=DEVICE), 512, 3) optimizer = Adam([p for p in model.parameters() if p.requires_grad], lr=1e-3) criterion = nn.CrossEntropyLoss() metrics = {'accuracy': Accuracy(), 'loss': Loss(criterion)} trainer = SequentialTrainer( model, optimizer, checkpoint_dir='../checkpoints' if not DEBUG else None, metrics=metrics, non_blocking=True, retain_graph=True, patience=5, loss_fn=criterion, device=DEVICE) if DEBUG: log.info('Starting end to end test') print('--------------------------------------------------------------') trainer.fit_debug(train_loader, dev_loader) log.info('Overfitting single batch') print('--------------------------------------------------------------') trainer.overfit_single_batch(train_loader) else: trainer.fit(train_loader, dev_loader, epochs=10)