def print_line(self, print_minibatch=False): if print_minibatch == False: progress = float(self._learner._current_epoch + 1) / float(self._learner._n_epochs) print_line = '%s (%d %d%%)' % ( timeSince(self.start, progress), self._learner._current_epoch + 1, progress * 100 ) else: progress = float(self._learner._current_epoch + 1) / float(self._learner._n_epochs) print_line = '%s (%d-%d %d%%)' % ( timeSince(self.start, progress), self._learner._batch_idx + 1, self._learner._current_epoch + 1, progress * 100 ) print_line += ' - %.2f it/s' % ( (self._learner._batch_idx - self.last_log_batch_idx) / (time.time() - self.last_log_at) ) self.last_log_at = time.time() self.last_log_batch_idx = self._learner._batch_idx metrics = self.learner.metrics # metrics = self.learner._batch_metrics if metrics is not None: for key in self.metrics: if key in metrics: print_line += ' - %s: %.4f' % (key, metrics[key]) self.logging_fn(print_line)
def trainIters(data, tag_to_ix, n_iters=50, log_every=10, optimizer='adam', learning_rate=1e-3, weight_decay=1e-5, grad_clip=5., gradual_unfreeze=False, tokenizer=wordpunct_space_tokenize, verbose=2, patience=4, save_path=None): save_path = save_path or SAVE_PATH # Invert the tag dictionary ix_to_tag = {value: key for key, value in tag_to_ix.items()} input_data = process_input(data, tokenizer=tokenizer) # Check input lengths for idx, (sentence, tags) in enumerate(input_data): if len(sentence) != len(tags): print('Warning: Size of sentence and tags didn\'t match') print('For sample: %s' % str(data[idx][0])) print('Lengths: %s' % str((len(sentence), len(tags)))) return model = BiLSTM_CRF(tag_to_ix) is_cuda = torch.cuda.is_available() scheduler = None # weight_decay = 1e-4 by default for SGD if optimizer == 'adam': weight_decay = weight_decay or 0 model_optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) else: weight_decay = weight_decay or 1e-5 model_optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay) scheduler = optim.lr_scheduler.LambdaLR( model_optimizer, lr_lambda=lambda step: lr_schedule_slanted_triangular( step, n_iters, learning_rate)) LOSS_LOG_FILE = path.join(LOG_DIR, 'neg_loss') INST_LOG_DIR = path.join(LOG_DIR, get_datetime_hostname()) writer = SummaryWriter(log_dir=INST_LOG_DIR) loss_total = 0 print_loss_total = 0 all_losses = [] best_loss = 1e15 wait = 0 if is_cuda: if verbose > 0: print('Training with GPU mode') model = model.cuda() else: if verbose > 0: print('Training with CPU mode') if verbose == 2: iterator = trange(1, n_iters + 1, desc='Epochs', leave=False) epoch_iterator = tqdm(input_data) else: iterator = range(1, n_iters + 1) epoch_iterator = input_data # For timing with verbose=1 start = time.time() layers_count = model.layers_count() for epoch in iterator: if gradual_unfreeze and epoch < layers_count: model.freeze_to(layers_count - epoch) for sentence, tags in epoch_iterator: loss = _train(sentence, tags, tag_to_ix, model, model_optimizer, grad_clip) loss_total += loss print_loss_total += loss if optimizer == 'sgd' and scheduler is not None: scheduler.step() all_losses.append(loss_total) writer.add_scalar(LOSS_LOG_FILE, loss_total, epoch) loss_total = 0 if loss_total < best_loss: best_loss = loss_total wait = 1 else: if wait >= patience: print('Early stopping') break wait += 1 if epoch % log_every == 0 and verbose != 0: with torch.no_grad(): accuracy = evaluate_all(model, data, tag_to_ix, tokenizer) _, tag_seq = model(input_data[0][0]) tag_interpreted = [ix_to_tag[tag] for tag in tag_seq] writer.add_text('Training predictions', ( ' - Input: `%s`\r\n - Tags: `%s`\r\n - Predicted: `%s`\r\n\r\nAccuracy: %s\r\n' % (str(input_data[0][0]), str( input_data[0][1]), str(tag_interpreted), accuracy)), epoch) if verbose == 1: print_loss_avg = print_loss_total / log_every progress = float(epoch) / float(n_iters) print('%s (%d %d%%) %.4f' % (timeSince(start, progress), epoch, progress * 100, print_loss_avg)) print_loss_total = 0 torch.save({ 'tag_to_ix': tag_to_ix, 'state_dict': model.state_dict() }, save_path) LOG_JSON = path.join(LOG_DIR, 'all_scalars.json') writer.export_scalars_to_json(LOG_JSON) writer.close() return all_losses, model
def trainIters(data, classes, batch_size=32, n_iters=50, log_every=10, optimizer='rmsprop', learning_rate=1e-2, weight_decay=None, verbose=2, patience=10, save_path=None): save_path = save_path or SAVE_PATH num_classes = len(classes) # input_data = process_input(data) cpu_count = mp.cpu_count() # Set class weights - this is kinda rough... weights = torch.zeros(num_classes) for _, class_idx in data: weights[int(class_idx)] += 1 for class_idx in range(num_classes): weights[int(class_idx)] = 1 / weights[int(class_idx)] print('Training started') # criterion = nn.CrossEntropyLoss(weight=weights) # criterion = nn.CrossEntropyLoss() criterion = nn.BCEWithLogitsLoss(weight=weights, reduction='sum') model = FastText(classes=num_classes) # weight_decay = 1e-4 by default for SGD if optimizer == 'adam': weight_decay = weight_decay or 0 adam_decay = weight_decay model_optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate) elif optimizer == 'rmsprop': weight_decay = weight_decay or 1e-5 adam_decay = 0 model_optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, weight_decay=weight_decay) else: weight_decay = weight_decay or 1e-4 adam_decay = 0 model_optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, weight_decay=weight_decay) LOSS_LOG_FILE = path.join(LOG_DIR, 'cross_entropy_loss') INST_LOG_DIR = path.join(LOG_DIR, get_datetime_hostname()) writer = SummaryWriter(log_dir=INST_LOG_DIR) all_losses = [] loss_total = 0 accuracy_total = 0 print_loss_total = 0 print_accuracy_total = 0 real_batch = 0 best_loss = 1e15 wait = 0 if verbose == 2: iterator = trange(1, n_iters + 1, desc='Epochs', leave=False) else: iterator = range(1, n_iters + 1) # For timing with verbose=1 dataset = FastTextDataset(data, num_classes) data_loader = DataLoader(dataset, batch_size=batch_size, num_workers=cpu_count) start = time.time() for epoch in iterator: for _, data_batch in enumerate(data_loader, 0): model.train() sentences = data_batch['sentence'] labels = data_batch['label'] real_batch += len(labels) # real batch size # Run the training epoch loss = _train(sentences, labels, model, criterion, model_optimizer, adam_decay) loss_total += loss accuracy_total += evaluate(model, sentences, labels) if verbose == 2: iterator.set_description('Minibatch: %s' % real_batch) loss_total = loss_total / real_batch accuracy_total = accuracy_total / real_batch print_accuracy_total += accuracy_total print_loss_total += loss_total writer.add_scalar(LOSS_LOG_FILE, loss_total, epoch) all_losses.append(loss_total) if loss_total < best_loss: best_loss = loss_total wait = 1 else: if wait >= patience: print('Early stopping') break wait += 1 real_batch = 0 accuracy_total = 0 loss_total = 0 if epoch % log_every == 0: print_accuracy_total = print_accuracy_total / log_every if verbose == 1: print_loss_avg = print_loss_total / log_every progress = float(epoch) / float(n_iters) print('%s (%d %d%%) %.4f - accuracy: %.4f' % (timeSince(start, progress), epoch, progress * 100, print_loss_avg, print_accuracy_total)) print_loss_total = 0 print_accuracy_total = 0 print('Calibrating model') model._calibrate(data_loader, weights) print('Training completed') torch.save({ 'classes': classes, 'state_dict': model.state_dict(), }, save_path) LOG_JSON = path.join(LOG_DIR, 'all_scalars.json') writer.export_scalars_to_json(LOG_JSON) writer.close() return all_losses, model
def trainIters(data, classes, batch_size=32, n_iters=50, log_every=10, optimizer='adam', learning_rate=1e-3, weight_decay=None, verbose=2, patience=4, save_path=None): save_path = save_path or SAVE_PATH num_classes = len(classes) input_data = process_input(data) # get class weights class_weights = {} intents_count = float(len(data)) weights_tensor = torch.zeros(num_classes).float() for _, label in data: if label not in class_weights: class_weights[label] = 1. else: class_weights[label] += 1. for label in class_weights: weights_tensor[label] = intents_count / class_weights[label] model = TextCNN(classes=num_classes) criterion = nn.CrossEntropyLoss(weight=weights_tensor) # weight_decay = 1e-4 by default for SGD if optimizer == 'adam': weight_decay = weight_decay or 0 model_optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) else: weight_decay = weight_decay or 1e-4 model_optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay) LOSS_LOG_FILE = path.join(LOG_DIR, 'cross_entropy_loss') INST_LOG_DIR = path.join(LOG_DIR, get_datetime_hostname()) writer = SummaryWriter(log_dir=INST_LOG_DIR) all_losses = [] loss_total = 0 accuracy_total = 0 print_loss_total = 0 print_accuracy_total = 0 real_batch = 0 best_loss = 1e15 wait = 0 if verbose == 2: iterator = trange(1, n_iters + 1, desc='Epochs', leave=False) else: iterator = range(1, n_iters + 1) # For timing with verbose=1 start = time.time() data_loader = DataLoader(input_data, batch_size=batch_size) for epoch in iterator: for _, data_batch in enumerate(data_loader, 0): sentences, labels = data_batch # Prepare training data sentence_in, target_variable = Variable(sentences), Variable( labels.type(torch.LongTensor)) real_batch += len(sentences) # real batch size # Run the training epoch loss = _train(sentence_in, target_variable, model, criterion, model_optimizer) loss_total += loss accuracy_total += evaluate(model, sentence_in, labels) loss_total = loss_total / real_batch accuracy_total = accuracy_total / real_batch print_accuracy_total += accuracy_total print_loss_total += loss_total writer.add_scalar(LOSS_LOG_FILE, loss_total, epoch) all_losses.append(loss_total) if loss_total < best_loss: best_loss = loss_total wait = 1 else: if wait >= patience: print('Early stopping') break wait += 1 real_batch = 0 accuracy_total = 0 loss_total = 0 if epoch % log_every == 0: print_accuracy_total = print_accuracy_total / log_every if verbose == 1: print_loss_avg = print_loss_total / log_every progress = float(epoch) / float(n_iters) print('%s (%d %d%%) %.4f - accuracy: %.4f' % (timeSince(start, progress), epoch, progress * 100, print_loss_avg, print_accuracy_total)) print_loss_total = 0 print_accuracy_total = 0 if len(all_losses ) > patience > 0 and all_losses[-1] > all_losses[-patience]: print('Early stopping') break torch.save({ 'classes': classes, 'state_dict': model.state_dict() }, save_path) LOG_JSON = path.join(LOG_DIR, 'all_scalars.json') writer.export_scalars_to_json(LOG_JSON) writer.close() return all_losses, model