def fit(self, optimizer, patience, num_epochs=200): liveloss = PlotLosses() # initialize the early_stopping object early_stopping = EarlyStopping(patience=patience, verbose=True, metric='auc') for epoch in tqdm(range(num_epochs)): logs = {} self.train(optimizer) val_auc, val_ap = self.evaluate(validation=True, test=False) logs['val_auc'] = val_auc logs['val_ap'] = val_ap liveloss.update(logs) liveloss.send() self.writer.add_scalar('val_auc', val_auc, epoch) self.writer.add_scalar('val_ap', val_ap, epoch) ### Add Early stop implementation # early_stopping needs the validation loss to check if it has decresed, # and if it has, it will make a checkpoint of the current model early_stopping(val_auc, self.model) if early_stopping.early_stop: print("Early stopping") break # load the last checkpoint with the best model self.model.load_state_dict(torch.load('checkpoint.pt')) return self.model
def test_neptune(): neptune_logger = NeptuneLogger( api_token="ANONYMOUS", project_qualified_name="shared/colab-test-run", tags=['livelossplot', 'github-actions'] ) plotlosses = PlotLosses(outputs=[neptune_logger]) assert neptune_logger.experiment.state == 'running' for i in range(3): plotlosses.update( { 'acc': 1 - np.random.rand() / (i + 2.), 'val_acc': 1 - np.random.rand() / (i + 0.5), 'loss': 1. / (i + 2.), 'val_loss': 1. / (i + 0.5) } ) plotlosses.send() assert neptune_logger.experiment.state == 'running' neptune_logger.close() assert neptune_logger.experiment.state == 'succeeded' url = neptune.project._get_experiment_link(neptune_logger.experiment) assert len(url) > 0
def test_extrema_print(): """Test if plugin object cache contains valid values""" groups = {'accuracy': ['acc', 'val_acc'], 'log-loss': ['loss', 'val_loss']} plugin = ExtremaPrinter() outputs = (plugin, ) liveplot = PlotLosses(outputs=outputs, groups=groups) liveplot.update({'acc': 0.5, 'val_acc': 0.4, 'loss': 1.2, 'val_loss': 1.1}) liveplot.update({ 'acc': 0.55, 'val_acc': 0.45, 'loss': 1.1, 'val_loss': 1.0 }) liveplot.update({ 'acc': 0.65, 'val_acc': 0.35, 'loss': 0.5, 'val_loss': 0.9 }) liveplot.update({ 'acc': 0.65, 'val_acc': 0.55, 'loss': 1.0, 'val_loss': 0.9 }) liveplot.send() assert len(plugin.extrema_cache['log-loss']) == 2 assert len(plugin.extrema_cache['log-loss']['training ']) == 3 assert plugin.extrema_cache['accuracy']['validation ']['min'] == 0.35 assert plugin.extrema_cache['accuracy']['validation ']['max'] == 0.55 assert plugin.extrema_cache['accuracy']['validation ']['current'] == 0.55
def test_minus_from_step(): """Test from_step < 0""" out = CheckOutput(target_log_history_length=6) loss_plotter = PlotLosses(outputs=[out], from_step=-5) for idx in range(10): loss_plotter.update({ 'acc': 0.1 * idx, 'loss': 0.69 / (idx + 1), }) loss_plotter.send()
def test_default_from_step(): """Test without from_step""" out = CheckOutput(target_log_history_length=10) loss_plotter = PlotLosses(outputs=[out]) for idx in range(10): loss_plotter.update({ 'acc': 0.1 * idx, 'loss': 0.69 / (idx + 1), }) loss_plotter.send()
def main(): api_token = os.environ.get('NEPTUNE_API_TOKEN') project_qualified_name = os.environ.get('NEPTUNE_PROJECT_NAME') logger = NeptuneLogger(api_token=api_token, project_qualified_name=project_qualified_name) liveplot = PlotLosses(outputs=[logger]) for i in range(20): liveplot.update({ 'accuracy': 1 - np.random.rand() / (i + 2.), 'val_accuracy': 1 - np.random.rand() / (i + 0.5), 'mse': 1. / (i + 2.), 'val_mse': 1. / (i + 0.5) }) liveplot.send() sleep(.5)
def test_bokeh_plot(): logger = BokehPlot() liveplot = PlotLosses(outputs=[logger], mode='script') for i in range(3): liveplot.update({ 'acc': 1 - np.random.rand() / (i + 2.), 'val_acc': 1 - np.random.rand() / (i + 0.5), 'loss': 1. / (i + 2.), 'val_loss': 1. / (i + 0.5) }) liveplot.send() assert os.path.isfile(logger.output_file)
def test_tensorboard(): groups = { 'acccuracy': ['acc', 'val_acc'], 'log-loss': ['loss', 'val_loss'] } logger = TensorboardTFLogger() liveplot = PlotLosses(groups=groups, outputs=(logger, )) for i in range(3): liveplot.update({ 'acc': 1 - np.random.rand() / (i + 2.), 'val_acc': 1 - np.random.rand() / (i + 0.5), 'loss': 1. / (i + 2.), 'val_loss': 1. / (i + 0.5) }) liveplot.send() assert all([ f.startswith('events.out.tfevents.') for f in os.listdir(logger._path) ])
def fit(self, train_loader): liveloss = PlotLosses() logs = {} for epoch in range(self.epoch_num): for batch_idx, (data, target) in enumerate(train_loader): data, target = Variable(data.float()).to( self.device), Variable(target.float()).to(self.device) data = data.view(-1, self.input_layer_size) target = target.view(-1, self.input_layer_size) self.optimizer.zero_grad() net_out = self.model(data) loss = self.criterion(net_out, target) loss.backward() self.optimizer.step() epoch_loss = loss.detach() logs['MSE loss'] = epoch_loss.item() liveloss.update(logs) liveloss.send() print("Number of weight coefficients:", self.model.number_of_weight_coefficients)
def test_plot_losses(): """Test basic usage""" loss_plotter = PlotLosses(outputs=(CheckOutput(), )) loss_plotter.update({ 'acc': 0.5, 'val_acc': 0.4, 'loss': 1.2, 'val_loss': 1.1 }) loss_plotter.update({ 'acc': 0.55, 'loss': 1.1, }) loss_plotter.update({ 'acc': 0.65, 'val_acc': 0.55, 'loss': 1.0, 'val_loss': 0.9 }) loss_plotter.update({ 'acc': 0.55, 'loss': 1.1, }) loss_plotter.send()
def test_extrema_print(): """Test if plugin object cache contains valid values""" groups = {'accuracy': ['acc', 'val_acc'], 'log-loss': ['loss', 'val_loss']} plugin = ExtremaPrinter() outputs = (plugin, ) liveplot = PlotLosses(outputs=outputs, groups=groups) liveplot.update({'acc': 0.5, 'val_acc': 0.4, 'loss': 1.2, 'val_loss': 1.1}) liveplot.update({ 'acc': 0.55, 'val_acc': 0.45, 'loss': 1.1, 'val_loss': 1.0 }) liveplot.update({ 'acc': 0.65, 'val_acc': 0.35, 'loss': 0.5, 'val_loss': 0.9 }) liveplot.update({ 'acc': 0.65, 'val_acc': 0.55, 'loss': 1.0, 'val_loss': 0.9 }) liveplot.send() message = liveplot.outputs[0].last_message ref_message = '\n'.join([ 'accuracy', '\ttraining \t (min: 0.500, max: 0.650, cur: 0.650)', '\tvalidation \t (min: 0.350, max: 0.550, cur: 0.550)', 'log-loss', '\ttraining \t (min: 0.500, max: 1.200, cur: 1.000)', '\tvalidation \t (min: 0.900, max: 1.100, cur: 0.900)' ]) assert message == ref_message
def train_eval_loop( model: Module, train_dataset: Dataset, val_dataset: Dataset, lr: float = 1e-4, epoch_n: int = 10, batch_size: int = 32, device=None, early_stopping_patience: int = 10, l2_reg_alpha: float = 0, max_batches_per_epoch_train: int = 10000, max_batches_per_epoch_val: int = 1000, optimizer_ctor: Optimizer = None, lr_scheduler_ctor=None, shuffle_train=True, dataloader_workers_n: int = 0, verbose_batch: bool = False, verbose_liveloss=True, prev_loss: Dict[str, List[float]] = {} ) -> Tuple[float, Module, Dict[str, List[float]]]: """ Цикл для обучения модели. После каждой эпохи качество модели оценивается по отложенной выборке. :param prev_loss: лоссы от предыдущего цикла обучения :param verbose_batch: :param model: torch.nn.Module - обучаемая модель :param train_dataset: torch.utils.data.Dataset - данные для обучения :param val_dataset: torch.utils.data.Dataset - данные для оценки качества :param criterion: функция потерь для настройки модели :param lr: скорость обучения :param epoch_n: максимальное количество эпох :param batch_size: количество примеров, обрабатываемых моделью за одну итерацию :param device: cuda/cpu - устройство, на котором выполнять вычисления :param early_stopping_patience: наибольшее количество эпох, в течение которых допускается отсутствие улучшения модели, чтобы обучение продолжалось. :param l2_reg_alpha: коэффициент L2-регуляризации :param max_batches_per_epoch_train: максимальное количество итераций на одну эпоху обучения :param max_batches_per_epoch_val: максимальное количество итераций на одну эпоху валидации :param optimizer_ctor :param optimizer_params :param lr_scheduler_ctor :param shuffle_train :param dataloader_workers_n :return: кортеж из двух элементов: - среднее значение функции потерь на валидации на лучшей эпохе - лучшая модель """ if device is None: device = 'cuda' if torch.cuda.is_available() else 'cpu' device = torch.device(device) model.to(device) if optimizer_ctor is None: optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_reg_alpha) else: optimizer = optimizer_ctor(model.parameters()) if lr_scheduler_ctor is not None: lr_scheduler = lr_scheduler_ctor(optimizer) else: lr_scheduler = None train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle_train, num_workers=dataloader_workers_n) val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=dataloader_workers_n) best_val_loss = float('inf') best_epoch_i = 0 best_model = copy.deepcopy(model) losses = { 'train_loss': prev_loss.get('train_loss', []), 'valid_loss': prev_loss.get('val_loss', []) } if verbose_liveloss: liveloss = PlotLosses() for epoch_i in range(epoch_n): try: epoch_start = datetime.datetime.now() print('Эпоха {}'.format(epoch_i)) model.train() mean_train_loss = 0 train_batches_n = 0 for batch_i, (batch_x, batch_y) in enumerate(train_dataloader): start_batch = time.time() if batch_i > max_batches_per_epoch_train: break mask = (batch_x[:, :, 1] != 0) batch_x = copy_data_to_device(batch_x, device) batch_y = copy_data_to_device(batch_y, device) mask = copy_data_to_device(mask, device) # set_trace() pred = model(batch_x) loss = -model.crf(pred.permute(0, 2, 1), batch_y, mask) / batch_size # loss = criterion(pred, batch_y) model.zero_grad() loss.backward() optimizer.step() mean_train_loss += float(loss) train_batches_n += 1 if verbose_batch: print( f"Батч {batch_i} выполнен за {time.time() - start_batch:.2f} секунд" ) mean_train_loss /= train_batches_n print('Эпоха: {} итераций, {:0.2f} сек'.format( train_batches_n, (datetime.datetime.now() - epoch_start).total_seconds())) print('Среднее значение функции потерь на обучении', mean_train_loss) losses['train_loss'].append(mean_train_loss) model.eval() mean_val_loss = 0 val_batches_n = 0 with torch.no_grad(): for batch_i, (batch_x, batch_y) in enumerate(val_dataloader): if batch_i > max_batches_per_epoch_val: break mask = (batch_x[:, :, 1] != 0) batch_x = copy_data_to_device(batch_x, device) batch_y = copy_data_to_device(batch_y, device) mask = copy_data_to_device(mask, device) pred = model(batch_x) loss = -model.crf(pred.permute(0, 2, 1), batch_y, mask) / batch_size mean_val_loss += float(loss) val_batches_n += 1 mean_val_loss /= val_batches_n print('Среднее значение функции потерь на валидации', mean_val_loss) losses['valid_loss'].append(mean_val_loss) logs = {'log loss': mean_train_loss, 'val_log loss': mean_val_loss} if mean_val_loss < best_val_loss: best_epoch_i = epoch_i best_val_loss = mean_val_loss best_model = copy.deepcopy(model) print('Новая лучшая модель!') elif epoch_i - best_epoch_i > early_stopping_patience: print( 'Модель не улучшилась за последние {} эпох, прекращаем обучение' .format(early_stopping_patience)) break if lr_scheduler is not None: lr_scheduler.step(mean_val_loss) print() except KeyboardInterrupt: print('Досрочно остановлено пользователем') break except Exception as ex: print('Ошибка при обучении: {}\n{}'.format(ex, traceback.format_exc())) break if verbose_liveloss: liveloss.update(logs) liveloss.send() return best_val_loss, best_model, losses
def _train(self, ckpt=None, is_retrain=False, plot_verbosity=True): print( "Note that the sparsity regularizations are not implemented yet..." ) if (ckpt): """in case training needs to be started from a checkpoint (Eg.: Case training a pre-trained model)""" self.optimizer.load_state_dict(ckpt['optimizer_state_dict']) if self.lr_scheduler is not None: self.lr_scheduler.load_state_dict( ckpt['lr_scheduler_state_dict']) self.model.load_state_dict(ckpt['model_state_dict']) args = self.args n_epochs = args.retrain_epochs if is_retrain else args.train_epochs best_ep, best_loss = 0, np.inf best_ckpt_path = args.model_path + '_best' + ("_retrain" if is_retrain else "") liveloss = PlotLosses() loss_history = [] if (not is_retrain): """Get the rewind epoch details for checkpointing.""" nB = len(self.train_dataloader.dataset) / args.batch_size rewind_epochs = args.rewind_epoch rewind_ep = int(rewind_epochs) rewind_residual_batch = nB * (rewind_epochs - rewind_ep) for ep in range(n_epochs): # TRAINING epoch_train_loss = 0. self.model.train() for i, batch in tqdm(enumerate(self.train_dataloader)): if (not is_retrain) and (ep == rewind_ep and i >= rewind_residual_batch): # if (args.retrain_mode=='weight-rewinding') # Checkpoint optimizer, lr_scheduler, and weights after 1.4 epochs for weight/ lr rewinding purposes w_rewind_ckpt = { "model_state_dict": self.model.state_dict(), "optimizer_state_dict": self.optimizer.state_dict(), "lr_scheduler_state_dict": self.lr_scheduler.state_dict() if self.lr_scheduler is not None else None, "epoch": rewind_epochs } torch.save(w_rewind_ckpt, self.rewind_ckpt_path) # perform the training loss = self.model( batch ) # model performs the outpur computation and the loss computation self.optimizer.zero_grad() if self.args.sparsity_reg == 'l1': loss += self._l1_reg() loss.backward() self.optimizer.step() # store the loss for logging epoch_train_loss += loss.cpu().data.item() * len(batch[0]) # step learning rate if self.lr_scheduler is not None: self.lr_scheduler.step() epoch_train_loss /= len(self.train_dataloader.dataset) # VALIDATION with torch.no_grad(): epoch_val_loss = 0. self.model.eval() for batch in tqdm(self.val_dataloader): loss = self.model(batch) epoch_val_loss += loss.cpu().data.item() * len(batch[0]) epoch_val_loss /= len(self.val_dataloader.dataset) # PLOT THE METRICS if plot_verbosity: plot_dict = { "loss": epoch_train_loss, "val_loss": epoch_val_loss } if self.compute_val_performance is not None: plot_dict.update({ "val_performance": self.compute_val_performance(self.model, self.val_dataloader, self.device) }) liveloss.update(plot_dict) liveloss.send() loss_history.append((epoch_train_loss, epoch_val_loss)) # DO THE EARLY STOPPING if (args.use_early_stop): if (epoch_train_loss > best_loss): if (args.patience + best_ep < ep): break else: best_ep = ep best_loss = epoch_train_loss best_ckpt = { "model_state_dict": self.model.state_dict(), "optimizer_state_dict": self.optimizer.state_dict(), "lr_scheduler_state_dict": self.lr_scheduler.state_dict() if self.lr_scheduler is not None else None, "epoch": best_ep } torch.save(best_ckpt, best_ckpt_path) if not (ep == best_ep): best_ckpt = torch.load(best_ckpt_path) self.model.load_state_dict(best_ckpt['model_state_dict']) self.lr_scheduler.load_state_dict( best_ckpt['lr_scheduler_state_dict']) self.optimizer.load_state_dict(ckpt['optimizer_state_dict']) return loss_history
def trainer(classifier, optimizer, scheduler, epochs, early_stop, train_dataloader, validation_dataloader, save_file, seed_val=0, accumulation_steps=1): if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") classifier = nn.DataParallel(classifier) classifier.to(device) random.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) best = (np.inf, -1, -np.inf, None, None) liveloss = PlotLosses() LossHistory = [] for epoch_i in range(0, epochs): print("") print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs)) print('Training...') classifier.train() epoch_loss = 0. start = time.time() classifier.zero_grad() for step, batch in enumerate(train_dataloader): logs = {} b_inputs = batch[0].to(device) b_labels = batch[1].to(device) b_mask = batch[2].to(device) b_ids = batch[3].to(device) loss, logits = classifier(input_ids=b_inputs, attention_mask=b_mask, token_type_ids=b_ids, labels=b_labels) if torch.cuda.device_count() > 1: loss = loss.sum() loss.backward() if (step + 1) % accumulation_steps == 0: torch.nn.utils.clip_grad_norm_(classifier.parameters(), 1.0) optimizer.step() scheduler.step() classifier.zero_grad() batch_loss = loss.cpu().item() epoch_loss += loss.cpu().item() if (step % 1000 == 0): print("Step %i with loss %f elapsed time %f" % (step, batch_loss, time.time() - start)) print('Evaluating...') classifier.eval() dev_loss = 0. total_eval_accuracy = 0. y_preds = None y_true = None for batch in validation_dataloader: b_inputs = batch[0].to(device) b_labels = batch[1].to(device) b_mask = batch[2].to(device) b_ids = batch[3].to(device) with torch.no_grad(): loss, logits = classifier(input_ids=b_inputs, attention_mask=b_mask, token_type_ids=b_ids, labels=b_labels) if torch.cuda.device_count() > 1: loss = loss.sum() dev_loss += loss.cpu().item() label_ids = b_labels.cpu().numpy() logits = logits.detach().cpu().numpy() total_eval_accuracy += flat_accuracy(logits, label_ids) if y_preds is None: y_preds = np.argmax(logits, axis=1) y_true = label_ids else: y_preds = np.concatenate((y_preds, np.argmax(logits, axis=1))) y_true = np.concatenate((y_true, label_ids)) avg_val_accuracy = total_eval_accuracy / len(validation_dataloader) f1_score_1 = precision_recall_fscore_support(y_true, y_preds, average="binary") f1_score_0 = precision_recall_fscore_support(y_true, y_preds, average="binary", pos_label=0) print("Epoch %i with dev loss %f and dev accuracy %f" % (epoch_i, dev_loss, avg_val_accuracy)) logs["val_loss"] = dev_loss / len(validation_dataloader) logs["loss"] = epoch_loss / len(train_dataloader) logs["val_accuracy"] = avg_val_accuracy liveloss.update(logs) LossHistory.append(logs["loss"]) liveloss.send() if (epoch_i - best[1] >= early_stop and best[0] < dev_loss): print("early_stopping, epoch:", epoch_i + 1) break elif (best[0] > dev_loss): best = (dev_loss, epoch_i, avg_val_accuracy, f1_score_1, f1_score_0) torch.save(classifier.state_dict(), 'checkpoint_big.pt') print("Final dev loss %f Final Train Loss %f Final dev accuracy %f" % (dev_loss, epoch_loss, avg_val_accuracy)) print("Best dev loss %f Best dev accuracy %f" % (best[0], best[2])) print("F1_score Sarcasm ", f1_score_1) print("F1_score Non-Sarcasm ", f1_score_0) return classifier, LossHistory
def train_clean(net, optimizer, dataloader, args): liveloss = PlotLosses() if (args['USE_SCHEDULER']): scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, args['sched_milestones'], gamma=args['sched_gamma']) for e in range(args['N_EPOCHS']): logs = {} for phase in ['train', 'val']: prefix = '' if (phase == 'train'): net.train() else: net.eval() prefix = 'val_' n_samples = len(dataloader[phase].dataset) n_batches = len(dataloader[phase]) running_loss = 0.0 running_acc = 0.0 for batch_id, (data, target) in enumerate(tqdm(dataloader[phase])): if (args['USE_CUDA']): data, target = data.cuda(), target.cuda() output, reconstructions, masked = net(data) loss = net.loss(data, output, target, reconstructions) if (phase == 'train'): if (batch_id == n_batches - 1): img1 = data[0].reshape(28, 28).detach().cpu().numpy() img2 = reconstructions[0].reshape( 28, 28).detach().cpu().numpy() weight = net.decoder.reconstraction_layers[0].weight[ 0][:3] grad = net.decoder.reconstraction_layers[ 0].weight.grad[0][:3].data optimizer.zero_grad() loss.backward() optimizer.step() running_acc += torch.sum(masked == target).item() running_loss += loss.item() logs[prefix + 'loss'] = running_loss / float(n_samples) logs[prefix + 'accuracy'] = running_acc / float(n_samples) #Scheduler if (args['USE_SCHEDULER'] and phase == 'train'): scheduler.step() for param in optimizer.param_groups: print("LR for the epoch is:", param['lr']) liveloss.update(logs) liveloss.send() f, axarr = plt.subplots(1, 2) axarr[0].imshow(img1) axarr[1].imshow(img2) plt.show() print("Weights of Reconstruction Layer:", weight) print("Grads of Reconstruction Layer:", grad)
def train_advanced(self, data_loaders, show_plot=True): liveloss = PlotLosses() how_near = 0.2 for epoch in range(self.num_epochs): logs = {} for phase in ['train', 'validation']: if phase == 'train': self.train() else: self.eval() running_loss = 0.0 for inputs, labels in data_loaders[phase]: inputs = T.DoubleTensor(inputs).to(self.device) targets = T.DoubleTensor(inputs).to(self.device) #inputs = T.DoubleTensor(inputs) inputs = Variable(inputs).to(self.device) targets = Variable(targets).to(self.device) #self.optimizer.zero_grad() #outputs = self.forward(inputs) outputs = self.encoder(inputs) outputs = self.decoder(outputs) loss = self.criterion(outputs, inputs) if phase == 'train': self.optimizer.zero_grad() loss.backward() self.optimizer.step() preds = outputs.view(len(inputs), self.original_dim) targets = targets.view(len(inputs), self.original_dim) #preds = preds.detach().cpu().numpy() #targets = targets.detach().cpu().numpy() #print(preds) #print('---') #print(targets) #preds = outputs.item() running_loss += loss.detach() * inputs.size(0) ''' if T.sum((T.abs(preds - targets) < T.abs(how_near*preds))): n_corrects += 1 else: n_wrongs += 1 ''' epoch_loss = running_loss / len(data_loaders[phase].dataset) #epoch_acc = (n_corrects*100) / len(data_loaders[phase].dataset) epoch_acc = self.accuracy(targets, preds) prefix = '' if phase == 'validation': prefix = 'val_' #print('[Model] epoch=%s, loss=%s , acc=%s' % ( epoch, loss.item(), epoch_acc.item)) print('[Model] epoch=%s, loss1=%s, loss2=%s acc=%s' % (epoch, loss.item(), epoch_loss.item(), epoch_acc)) logs[prefix + 'log loss'] = epoch_loss.item() logs[prefix + 'accuracy'] = epoch_acc if show_plot: liveloss.update(logs) liveloss.send()
# plot liveloss = PlotLosses() # train loop for ep in range(epoch): s_time = time.time() p_loss_v = 0 print(f'start ep: {ep}') for it, (batch_x, batch_y) in enumerate(train_loader): batch_x = batch_x.to(device) batch_y = batch_y.to(device) optimizer.zero_grad() predict = model(batch_x) p_loss = loss(predict, batch_y) p_loss_v = p_loss.item() p_loss.backward() optimizer.step() # plot if it % 50 == 0: liveloss.update({'loss': p_loss_v}) liveloss.send() print(f'end ep: {ep} @ {time.time()-s_time:.3f}s') if (ep + 1) % 2 == 0: torch.save(model.state_dict(), f'save/ep_{ep+1}.pth')
class Logger(): def __init__(self, n_epochs, batches_epoch, out_dir, start_epoch=1): # self.viz = Visdom() self.n_epochs = n_epochs self.batches_epoch = batches_epoch self.epoch = start_epoch self.batch = 1 self.prev_time = time.time() self.mean_period = 0 self.losses = {} self.loss_windows = {} self.image_windows = {} self.out_dir = out_dir self.to_image = transforms.ToPILImage() self.liveloss = PlotLosses() def log(self, losses=None, images=None): pass self.mean_period += (time.time() - self.prev_time) self.prev_time = time.time() sys.stdout.write( '\rEpoch %03d/%03d [%04d/%04d] -- ' % (self.epoch, self.n_epochs, self.batch, self.batches_epoch)) plots = {} for i, loss_name in enumerate(losses.keys()): if loss_name not in self.losses: self.losses[loss_name] = losses[loss_name].data else: self.losses[loss_name] += losses[loss_name].data if (i + 1) == len(losses.keys()): sys.stdout.write( '%s: %.4f -- ' % (loss_name, self.losses[loss_name] / self.batch)) else: sys.stdout.write( '%s: %.4f | ' % (loss_name, self.losses[loss_name] / self.batch)) batches_done = self.batches_epoch * (self.epoch - 1) + self.batch batches_left = self.batches_epoch * ( self.n_epochs - self.epoch) + self.batches_epoch - self.batch sys.stdout.write('ETA: %s' % (datetime.timedelta( seconds=batches_left * self.mean_period / batches_done))) if self.batch % 10 == 0: # Save images plt.ioff() fig = plt.figure(figsize=(100, 50)) for i, (image_name, tensor) in enumerate(images.items()): ax = plt.subplot(1, len(images), i + 1) ax.imshow(self.to_image(tensor.cpu().data[0])) fig.savefig(self.out_dir + '/%d_%d.png' % (self.epoch, self.batch)) plt.close(fig) # self.to_image(images["composed"].cpu().data[0]).save(self.out_dir + '/%d_%d.png' % (self.epoch, self.batch)) # plt.close(fig) # End of epoch if (self.batch % self.batches_epoch) == 0: # Plot losses for i, (loss_name, loss) in enumerate(self.losses.items()): # if loss_name not in self.loss_windows: # self.loss_windows[loss_name] = self.viz.line(X=np.array([self.epoch]), Y=np.array([loss/self.batch]), # opts={'xlabel': 'epochs', 'ylabel': loss_name, 'title': loss_name}) # else: # self.viz.line(X=np.array([self.epoch]), Y=np.array([loss/self.batch]), win=self.loss_windows[loss_name], update='append') plots[loss_name] = self.losses[loss_name] / self.batch # Reset losses for next epoch self.losses[loss_name] = 0.0 self.liveloss.update(plots) self.liveloss.send() self.epoch += 1 self.batch = 1 sys.stdout.write('\n') else: self.batch += 1
ind.fitness.values = fit #nova população population[:] = offspring #pega melhor e pior indivíduos para montar o gráfico top = tools.selBest(population, k=1) worst = tools.selWorst(population, k=1) avg_h = avg_h/len(population) top_h = nqueen_fitness(top[0])[0] worst_h = nqueen_fitness(worst[0])[0] plotlosses.update({'top': top_h, 'average': avg_h, 'worst': worst_h}) plotlosses.send() #Avalia critério de parada if(nqueen_fitness(top[0])[0] == 0): print(top[0]) resultado = binToDec(top[0],log_N) #dataframe eixos = [i for i in range(N)] estado_inicial = pd.DataFrame(index=(eixos),columns=(eixos)) estadoInicial = list(random.randrange(N) for i in range(N)) for i in range(len(estadoInicial)): estado_inicial[eixos[i]][resultado[i]] = 'rainha' break
class Trainer(object): def __init__( self, model=None, data_loader=None, train_times=1000, lr=1e-3, alpha=0.5, use_gpu=True, opt_method="sgd", save_steps=None, checkpoint_dir=None, ): self.work_threads = 8 self.train_times = train_times self.opt_method = opt_method self.optimizer = None self.lr_decay = 0 self.weight_decay = 0 self.alpha = alpha self.lr = lr self.model = model self.data_loader = data_loader self.use_gpu = use_gpu self.save_steps = save_steps self.checkpoint_dir = checkpoint_dir self.liveplot = PlotLosses() def train_one_step(self, data, stage=1): self.optimizer.zero_grad() self.model.zero_grad() loss = self.model({ 'batch_h': self.to_var(data['batch_h'], self.use_gpu), 'batch_t': self.to_var(data['batch_t'], self.use_gpu), 'batch_r': self.to_var(data['batch_r'], self.use_gpu), 'batch_y': self.to_var(data['batch_y'], self.use_gpu), 'mode': data['mode'], 'stage': stage }) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), 2) self.optimizer.step() return loss.item() def run(self, lr=None, alpha=None, weight_decay=None, train_times=None, stage=1, multiplier=1): if lr: self.lr = lr if alpha: self.alpha = alpha if weight_decay: self.weight_decay = weight_decay if train_times: self.train_times = train_times if self.use_gpu: self.model.cuda() if self.optimizer is not None: pass elif self.opt_method == "Adagrad" or self.opt_method == "adagrad": self.optimizer = optim.Adagrad( self.model.parameters(), lr=self.lr, lr_decay=self.lr_decay, weight_decay=self.weight_decay, ) elif self.opt_method == "Adadelta" or self.opt_method == "adadelta": self.optimizer = optim.Adadelta( self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay, ) elif self.opt_method == "Adam" or self.opt_method == "adam": self.optimizer = optim.Adam( self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay, ) elif self.opt_method == "ranger": if not lr: self.optimizer = Ranger(self.model.parameters(), lr=self.lr, alpha=self.alpha) else: self.optimizer = Ranger(self.model.parameters(), lr=lr, alpha=self.alpha) elif self.opt_method == "rangerva": self.optimizer = RangerVA(self.model.parameters(), lr=lr) else: self.optimizer = optim.SGD( self.model.parameters(), lr=self.alpha, weight_decay=self.weight_decay, ) print("Finish initializing...") # training_range = tqdm.tqdm(range(self.train_times)) training_range = tqdm.trange(self.train_times) # training_range = range(self.train_times) for epoch in training_range: res = 0.0 for data in self.data_loader: loss = multiplier * self.train_one_step(data, stage) res += loss self.liveplot.update({'loss': res}) self.liveplot.send() if self.save_steps and self.checkpoint_dir and ( epoch + 1) % self.save_steps == 0: print("Epoch %d has finished, saving..." % (epoch)) self.model.save_checkpoint( os.path.join(self.checkpoint_dir + "-" + str(epoch) + ".ckpt")) def set_model(self, model): self.model = model def to_var(self, x, use_gpu): if use_gpu: return Variable(torch.from_numpy(x).cuda()) else: return Variable(torch.from_numpy(x)) def set_use_gpu(self, use_gpu): self.use_gpu = use_gpu def set_alpha(self, alpha): self.alpha = alpha def set_lr_decay(self, lr_decay): self.lr_decay = lr_decay def set_weight_decay(self, weight_decay): self.weight_decay = weight_decay def set_opt_method(self, opt_method): self.opt_method = opt_method def set_train_times(self, train_times): self.train_times = train_times def set_save_steps(self, save_steps, checkpoint_dir=None): self.save_steps = save_steps if not self.checkpoint_dir: self.set_checkpoint_dir(checkpoint_dir) def set_checkpoint_dir(self, checkpoint_dir): self.checkpoint_dir = checkpoint_dir
def fit(self, interactions_df, users_df, items_df): """ Training of the recommender. :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items defined by user_id, item_id and features of the interaction. :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns. :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns. """ del users_df, items_df # Shift item ids and user ids so that they are consecutive unique_item_ids = interactions_df['item_id'].unique() self.item_id_mapping = dict( zip(unique_item_ids, list(range(len(unique_item_ids))))) self.item_id_reverse_mapping = dict( zip(list(range(len(unique_item_ids))), unique_item_ids)) unique_user_ids = interactions_df['user_id'].unique() self.user_id_mapping = dict( zip(unique_user_ids, list(range(len(unique_user_ids))))) self.user_id_reverse_mapping = dict( zip(list(range(len(unique_user_ids))), unique_user_ids)) interactions_df = interactions_df.copy() interactions_df.replace( { 'item_id': self.item_id_mapping, 'user_id': self.user_id_mapping }, inplace=True) # Get the number of items and users self.interactions_df = interactions_df n_users = np.max(interactions_df['user_id']) + 1 n_items = np.max(interactions_df['item_id']) + 1 # Get the user-item interaction matrix (mapping to int is necessary because of how iterrows works) r = np.zeros(shape=(n_users, n_items)) for idx, interaction in interactions_df.iterrows(): r[int(interaction['user_id'])][int(interaction['item_id'])] = 1 self.r = r # Generate negative interactions negative_interactions = [] i = 0 while i < self.n_neg_per_pos * len(interactions_df): sample_size = 1000 user_ids = self.rng.choice(np.arange(n_users), size=sample_size) item_ids = self.rng.choice(np.arange(n_items), size=sample_size) j = 0 while j < sample_size and i < self.n_neg_per_pos * len( interactions_df): if r[user_ids[j]][item_ids[j]] == 0: negative_interactions.append([user_ids[j], item_ids[j], 0]) i += 1 j += 1 interactions_df = pd.concat([ interactions_df, pd.DataFrame(negative_interactions, columns=['user_id', 'item_id', 'interacted']) ]) # Initialize user and item embeddings as random vectors (from Gaussian distribution) self.user_repr = self.rng.normal(0, 1, size=(r.shape[0], self.embedding_dim)) self.item_repr = self.rng.normal(0, 1, size=(r.shape[1], self.embedding_dim)) # Initialize losses and loss visualization if self.print_type is not None and self.print_type == 'live': liveloss = PlotLosses() training_losses = deque(maxlen=50) training_avg_losses = [] training_epoch_losses = [] validation_losses = deque(maxlen=50) validation_avg_losses = [] validation_epoch_losses = [] last_training_total_loss = 0.0 last_validation_total_loss = 0.0 # Split the data interaction_ids = self.rng.permutation(len(interactions_df)) train_validation_slice_idx = int( len(interactions_df) * (1 - self.validation_set_size)) training_ids = interaction_ids[:train_validation_slice_idx] validation_ids = interaction_ids[train_validation_slice_idx:] # Train the model for epoch in range(self.n_epochs): if self.print_type is not None and self.print_type == 'live': logs = {} # Train training_losses.clear() training_total_loss = 0.0 batch_idx = 0 for idx in training_ids: user_id = int(interactions_df.iloc[idx]['user_id']) item_id = int(interactions_df.iloc[idx]['item_id']) e_ui = r[user_id, item_id] - np.dot(self.user_repr[user_id], self.item_repr[item_id]) self.user_repr[user_id] = self.user_repr[user_id] \ + self.lr * (e_ui * self.item_repr[item_id] - self.reg_l * self.user_repr[user_id]) self.item_repr[item_id] = self.item_repr[item_id] \ + self.lr * (e_ui * self.user_repr[user_id] - self.reg_l * self.item_repr[item_id]) loss = e_ui**2 training_total_loss += loss if self.print_type is not None and self.print_type == 'text': print( "\rEpoch: {}\tBatch: {}\tLast epoch - avg training loss: {:.2f} avg validation loss: {:.2f} loss: {}" .format(epoch, batch_idx, last_training_total_loss, last_validation_total_loss, loss), end="") batch_idx += 1 training_losses.append(loss) training_avg_losses.append(np.mean(training_losses)) # Validate validation_losses.clear() validation_total_loss = 0.0 for idx in validation_ids: user_id = int(interactions_df.iloc[idx]['user_id']) item_id = int(interactions_df.iloc[idx]['item_id']) e_ui = r[user_id, item_id] - np.dot(self.user_repr[user_id], self.item_repr[item_id]) loss = e_ui**2 validation_total_loss += loss validation_losses.append(loss) validation_avg_losses.append(np.mean(validation_losses)) # Save and print epoch losses training_last_avg_loss = training_total_loss / len(training_ids) training_epoch_losses.append(training_last_avg_loss) validation_last_avg_loss = validation_total_loss / len( validation_ids) validation_epoch_losses.append(validation_last_avg_loss) if self.print_type is not None and self.print_type == 'live' and epoch >= 3: # A bound on epoch prevents showing extremely high losses in the first epochs # noinspection PyUnboundLocalVariable logs['loss'] = training_last_avg_loss logs['val_loss'] = validation_last_avg_loss # noinspection PyUnboundLocalVariable liveloss.update(logs) liveloss.send() # Find the most popular items for the cold start problem offers_count = interactions_df.loc[:, ['item_id', 'user_id']].groupby( by='item_id').count() offers_count = offers_count.sort_values('user_id', ascending=False) self.most_popular_items = offers_count.index
def trainer(cfg, train_id=None, num_workers=15, device=None): device = device or 'cuda:0' ## train_id = train_id or cfg['train_id'] use_pretrained_vgg=cfg["use_pretrained_vgg"] batch_size=cfg["batch_size"] lr=cfg["lr"] num_epochs=cfg["num_epochs"] model = ternausnet.models.UNet11(pretrained=use_pretrained_vgg) if cfg.get('first_freeze_layers', None) is not None: for i in range(cfg['first_freeze_layers']): for param in model.encoder[i].parameters(): param.requires_grad = False if cfg['pretrained_model'] is not None: model.load_state_dict(torch.load(cfg['pretrained_model'])) model = model.to(device) loss = nn.BCEWithLogitsLoss() optimizer = Adam(filter(lambda x: return x.requires_grad, model.parameters()), lr) d_train = WaterDataset(cfg['train_img_list'], train_transform) d_val = WaterDataset(cfg['test_img_list'], test_transform) print(d_val[0][0].shape) dl_train = DataLoader(d_train, batch_size, shuffle=True, num_workers=num_workers) dl_val = DataLoader(d_val, batch_size, shuffle=False, num_workers=num_workers) metrics = { 'val_acc': AccuracyMetric(0.5), 'train_acc': AccuracyMetric(0.5), 'val_loss': LossMetric(), 'train_loss': LossMetric(), 'train_lake_acc': LakeAccuracyMetric(0.5), 'val_lake_acc': LakeAccuracyMetric(0.5), 'train_nolake_acc': NoLakeAccuracyMetric(0.5), 'val_nolake_acc': NoLakeAccuracyMetric(0.5), 'val_miou': MIOUMetric(0.5), 'train_miou': MIOUMetric(0.5), 'val_f1': F1Metric(0.5), 'train_f1': F1Metric(0.5) } groups = { 'accuracy': ['train_acc', 'val_acc'], 'bce-loss': ['train_loss', 'val_loss'], 'lake-acc': ['train_lake_acc', 'val_lake_acc'], 'nolake_acc': ['train_nolake_acc', 'val_nolake_acc'], 'miou': ['train_miou', 'val_miou'], 'f1': ['train_f1', 'val_f1'] } plotlosses = PlotLosses(groups=groups) topk_val_losses = {} for epoch in range(num_epochs): print('train step') for name, metric in metrics.items(): metric.reset() model.train() for idx, (im, gt) in enumerate(dl_train): im = im.to(device) gt = gt.to(device) optimizer.zero_grad() pred = model(im) L = loss(pred, gt) L.backward() assert pred.shape == gt.shape metrics['train_acc'].append(pred, gt) metrics['train_lake_acc'].append(pred, gt) metrics['train_nolake_acc'].append(pred, gt) metrics['train_miou'].append(pred, gt) metrics['train_f1'].append(pred, gt) metrics['train_loss'].append(L) optimizer.step() torch.cuda.empty_cache() model.eval() print('eval step') with torch.no_grad(): for idx, (im, gt) in enumerate(dl_val): im = im.to(device) gt = gt.to(device) pred = model(im) L = loss(pred, gt) metrics['val_acc'].append(pred, gt) metrics['val_lake_acc'].append(pred, gt) metrics['val_nolake_acc'].append(pred, gt) metrics['val_miou'].append(pred, gt) metrics['val_f1'].append(pred, gt) metrics['val_loss'].append(L) torch.cuda.empty_cache() results = {key: metrics[key].result() for key in metrics} plotlosses.update(results) plotlosses.send() for name, metric in metrics.items(): metric.history() history = {key: metrics[key].hist for key in metrics} save_models(model, topk_val_losses, metrics['val_loss'].result(), epoch, train_id, save_num_models=3) torch.save(model.state_dict(), 'model-latest.pth') with open(f'history-{train_id}.json', "w") as write_file: json.dump(history, write_file, indent=4)
def trainer(classifier, optimizer, scheduler, epochs, early_stop, train_dataloader, validation_dataloader, save_file, seed_val=0, accumulation_steps=1): if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") classifier = nn.DataParallel(classifier) classifier.to(device) tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2') embedder = AlbertModel.from_pretrained('albert-base-v2') embedder.to(device) random.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) best = (np.inf, -1, -np.inf, None, None) liveloss = PlotLosses() LossHistory = [] val_step = 0 for epoch_i in range(0, epochs): logs = {} print("") print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs)) print("Global Learning Rate", optimizer.param_groups[0]["lr"]) print('Training...') classifier.train() epoch_loss = 0. start = time.time() classifier.zero_grad() for step, batch in enumerate(train_dataloader): b_inputs_c = batch[0].to(device) b_inputs_r = batch[1].to(device) b_mask_c = batch[2].to(device) b_mask_r = batch[3].to(device) b_labels = batch[4].to(device) x_c = embedder(input_ids=b_inputs_c, attention_mask=b_mask_c)[0] x_r = embedder(input_ids=b_inputs_r, attention_mask=b_mask_r)[0] loss, logits = classifier(x_c.permute(1, 0, 2), x_r.permute(1, 0, 2), b_labels) if torch.cuda.device_count() > 1: loss = loss.sum() loss.backward() if (step + 1) % accumulation_steps == 0: torch.nn.utils.clip_grad_norm_(classifier.parameters(), 1.0) optimizer.step() scheduler.step() classifier.zero_grad() batch_loss = loss.cpu().item() epoch_loss += loss.cpu().item() if (step % 100) == 0: print("Step %i with loss %.3f elapsed time %.3f" % (step, epoch_loss / (step + 1), time.time() - start)) # writer.add_scalar("Loss/train", epoch_loss/(step+1), global_step) # writer.flush() print('Evaluating...') classifier.eval() dev_loss = 0. total_eval_accuracy = 0. y_preds = None y_true = None for batch in validation_dataloader: b_inputs_c = batch[0].to(device) b_inputs_r = batch[1].to(device) b_mask_c = batch[2].to(device) b_mask_r = batch[3].to(device) b_labels = batch[4].to(device) with torch.no_grad(): x_c = embedder(input_ids=b_inputs_c, attention_mask=b_mask_c)[0] x_r = embedder(input_ids=b_inputs_r, attention_mask=b_mask_r)[0] loss, logits = classifier(x_c.permute(1, 0, 2), x_r.permute(1, 0, 2), b_labels) if torch.cuda.device_count() > 1: loss = loss.sum() dev_loss += loss.cpu().item() label_ids = b_labels.cpu().numpy() logits = logits.detach().cpu().numpy() total_eval_accuracy += flat_accuracy(logits, label_ids) if y_preds is None: y_preds = np.argmax(logits, axis=1) y_true = label_ids else: y_preds = np.concatenate((y_preds, np.argmax(logits, axis=1))) y_true = np.concatenate((y_true, label_ids)) avg_val_accuracy = total_eval_accuracy / len(validation_dataloader) f1_score_1 = precision_recall_fscore_support(y_true, y_preds, average="binary") f1_score_0 = precision_recall_fscore_support(y_true, y_preds, average="binary", pos_label=0) print("Epoch %i with dev loss %f and dev accuracy %f" % (epoch_i + 1, dev_loss, avg_val_accuracy)) logs["val_loss"] = dev_loss / len(validation_dataloader) logs["loss"] = epoch_loss / len(train_dataloader) logs["val_accuracy"] = avg_val_accuracy liveloss.update(logs) LossHistory.append(logs["loss"]) liveloss.send() if (val_step - best[1] >= early_stop and best[0] < dev_loss): print("early_stopping, epoch:", epoch_i + 1) print( "Final dev loss %f Final Train Loss %f Final dev accuracy %f" % (dev_loss, epoch_loss, avg_val_accuracy)) print("Best dev loss %f Best dev accuracy %f" % (best[0], best[2])) print("F1_score Sarcasm ", f1_score_1) print("F1_score Non-Sarcasm ", f1_score_0) return classifier elif (best[0] > dev_loss): best = (dev_loss, val_step, avg_val_accuracy, f1_score_1, f1_score_0) torch.save(classifier.state_dict(), save_file) val_step += 1 classifier.train() print("Final dev loss %f Final Train Loss %f Final dev accuracy %f" % (dev_loss, epoch_loss, avg_val_accuracy)) print("Best dev loss %f Best dev accuracy %f" % (best[0], best[2])) print("F1_score Sarcasm ", f1_score_1) print("F1_score Non-Sarcasm ", f1_score_0) return classifier
def train(self): """ Train the model """ # initial setup epoch = 1 loss_val_best = 100 num_epochs_increased = 0 epoch_best = 1 liveloss = PlotLosses() logs = {} # Perform training while True: # Run one iteration of SGD t0 = time.time() loss_train = self.train_epoch() loss_train_eval = self.compute_loss(self.loader_train_eval) loss_val = self.compute_loss(self.loader_val) time_epoch = time.time() - t0 self.logger.add_entry({ 'loss_train': loss_train, 'loss_train_eval': loss_train_eval, 'loss_val': loss_val }) # run learing rate scheduler if self.scheduler: self.scheduler.step(loss_val) # save logger info if self.save_dir: self.logger.append(os.path.join(self.save_dir, 'log.txt')) # change in loss_val d_loss_val = (loss_val - loss_val_best) / loss_val_best * 100 # display results logs['loss'] = loss_train_eval logs['val_loss'] = loss_val logs['percent improvement'] = ( loss_val - loss_train_eval) / loss_train_eval * 100 liveloss.update(logs) logs['val_percent improvement'] = d_loss_val liveloss.send() print( 'E: {:} / Train: {:.3e} / Valid: {:.3e} / Diff Valid: {:.2f}% / Diff Valid-Train: {:.1f}% / Time: {:.2f}' .format(epoch, loss_train_eval, loss_val, d_loss_val, (loss_val - loss_train_eval) / loss_train_eval * 100, time_epoch)) # if validation loss improves if d_loss_val < 0: num_epochs_increased = 0 # record epoch and loss epoch_best = epoch loss_val_best = loss_val # save model weights if self.save_dir: print('Validation loss improved. Saving model.') torch.save(self.model.state_dict(), os.path.join(self.save_dir, 'model.dat')) else: num_epochs_increased = num_epochs_increased + 1 # stop training if we lose patience: if num_epochs_increased > self.patience: break # advance epoch counter epoch = epoch + 1
def fit(self, X, eval_X, y=None, model_saved_path='bprh_model.pkl', iter_to_save=5000, coselection_saved_path='data/item-set-coselection.pkl', iter_to_log=100, correlation=True, coselection=False, plot_metric=False, log_metric=False): # Here we do not load model -> train a new model if self.existed_model_path is None: # To make sure train and test works with inconsistent user and item list, # we transform user and item's string ID to int ID so that their ID is their index in U and V print("Registering Model Parameters") # rename user and item self.user_original_id_list = sorted( set(X.UserID).union(set(eval_X.UserID))) self.item_original_id_list = sorted( set(X.ItemID).union(set(eval_X.ItemID))) self.train_data = X.copy() self.test_data = eval_X.copy() self.train_data.UserID = self.train_data.UserID.apply( lambda x: self.user_original_id_list.index(x)) self.train_data.ItemID = self.train_data.ItemID.apply( lambda x: self.item_original_id_list.index(x)) self.test_data.UserID = self.test_data.UserID.apply( lambda x: self.user_original_id_list.index(x)) self.test_data.ItemID = self.test_data.ItemID.apply( lambda x: self.item_original_id_list.index(x)) self.item_list = [ idx[0] for idx in enumerate(self.item_original_id_list) ] self.user_list = [ idx[0] for idx in enumerate(self.user_original_id_list) ] self.num_u = len(self.user_list) self.num_i = len(self.item_list) # build I_u_t, I_u_a (pre-computing for acceleration) self.build_itemset_for_user() # Calculate auxiliary-target correlation C for every user and each types of auxiliary action if correlation: self.alpha_u = self.auxiliary_target_correlation( X=self.train_data) else: print( "No auxiliary-target correlation - all alpha_u equal to one" ) alpha_u_all_ones = dict() user_set_bar = tqdm(self.user_list) for u in user_set_bar: alpha_u_all_ones[u] = dict() alpha_u_all_ones[u]['alpha'] = 1.0 self.alpha_u = alpha_u_all_ones.copy() # Generate item-set based on co-selection if coselection: self.S, self.U_item = self.itemset_coselection( X=self.train_data) # Initialization of User and Item Matrices if self.random_state is not None: np.random.seed(self.random_state) else: np.random.seed(0) print("Initializing User and Item Matrices") # NOTE: Initialization is influenced by mean and std self.U = np.random.normal(size=(self.num_u, self.dim + 1), loc=0.0, scale=0.1) self.V = np.random.normal(size=(self.dim + 1, self.num_i), loc=0.0, scale=0.1) # self.U = np.zeros(shape=(self.num_u, self.dim + 1)) # self.V = np.zeros(shape=(self.dim + 1, self.num_i)) self.U[:, -1] = 1.0 # estimation is U dot V self.estimation = np.dot(self.U, self.V) # Configure loss plots layout if plot_metric: groups = { 'Precision@K': ['Precision@5', 'Precision@10'], 'Recall@K': ['Recall@5', 'Recall@10'], 'AUC': ['AUC'] } plot_losses = PlotLosses(groups=groups) # Start Iteration all_item = set(self.item_list) user_in_train = sorted(set(self.train_data.UserID)) print("Start Training") with trange(self.num_iter) as t: for index in t: # Description will be displayed on the left # t.set_description('ITER %i' % index) # Build u, I, J, K # uniformly sample a user from U u = choice(user_in_train) # build I # uniformly sample a item i from I_u_t I_u_t = self.I_u_t_train[u] if len(I_u_t) != 0: i = choice(sorted(I_u_t)) # build I = I_u_t cap S_i if coselection: I = I_u_t.intersection(self.S[i]) else: # if no coselection, we set I as the set of purchased items by user u # no uniform sampling, like COFISET I = I_u_t else: # if no item in I_u_t, then set I to empty set i = None I = set() # build J, since we only have one auxiliary action, we follow the uniform sampling I_u_oa = self.I_u_a_train[u] - I_u_t if len(I_u_oa) != 0: j = choice(sorted(I_u_oa)) if coselection: # NOTE: typo in paper? J = I_u_oa.intersection(self.S[j]) else: # if no coselection, we set J as the set of only-auxiliary items by user u # no uniform sampling, like COFISET J = I_u_oa else: # if no item in I_u_oa, then set J to empty set j = None J = set() # build K I_u_n = all_item - I_u_t - I_u_oa if len(I_u_n) != 0: k = choice(sorted(I_u_n)) # build K if coselection: # NOTE: typo in paper? K = I_u_n.intersection(self.S[k]) else: # if no coselection, we set K as the set of no-action items by user u # no uniform sampling, like COFISET K = I_u_n else: # if no item in I_u_n, then set K to empty set k = None K = set() # calculate intermediate variables # get specific alpha_u spec_alpha_u = self.alpha_u[u]['alpha'] U_u = self.U[u, :-1].copy() sorted_I = sorted(I) sorted_J = sorted(J) sorted_K = sorted(K) # get r_hat_uIJ, r_hat_uJK, r_hat_uIK r_hat_uI = np.average( self.estimation[u, sorted_I]) if len(I) != 0 else np.array( [0]) r_hat_uJ = np.average( self.estimation[u, sorted_J]) if len(J) != 0 else np.array( [0]) r_hat_uK = np.average( self.estimation[u, sorted_K]) if len(K) != 0 else np.array( [0]) r_hat_uIJ = r_hat_uI - r_hat_uJ r_hat_uJK = r_hat_uJ - r_hat_uK r_hat_uIK = r_hat_uI - r_hat_uK # get V_bar_I, V_bar_J, V_bar_K V_bar_I = np.average(self.V[:-1, sorted_I], axis=1) if len(I) != 0 else np.zeros( shape=(self.dim, )) V_bar_J = np.average(self.V[:-1, sorted_J], axis=1) if len(J) != 0 else np.zeros( shape=(self.dim, )) V_bar_K = np.average(self.V[:-1, sorted_K], axis=1) if len(K) != 0 else np.zeros( shape=(self.dim, )) # get b_I, b_J, b_K b_I = np.average( self.V[-1, sorted_I]) if len(I) != 0 else np.array([0]) b_J = np.average( self.V[-1, sorted_J]) if len(J) != 0 else np.array([0]) b_K = np.average( self.V[-1, sorted_K]) if len(K) != 0 else np.array([0]) # here we want to examine the condition of empty sets indicator_I = indicator(len(I) == 0) indicator_J = indicator(len(J) == 0) indicator_K = indicator(len(K) == 0) indicator_sum = indicator_I + indicator_J + indicator_K if 0 <= indicator_sum <= 1: # these are the cases when only one set are empty or no set is empty # when all three are not empty, or I is empty, or K is empty, it is # easy to rewrite the obj by multiplying the indicator # when J is empty, we have to rewrite the obj if indicator_J == 1: # when J is empty # NABLA U_u df_dUu = sigmoid(-r_hat_uIK) * (V_bar_I - V_bar_K) dR_dUu = 2 * self.lambda_u * U_u # update U_u = U_u + gamma * (df_dUu - dR_dUu) self.U[u, :-1] += self.gamma * (df_dUu - dR_dUu) # NABLA V_i df_dbi = (1 - indicator_I ) * sigmoid(-r_hat_uIK) / indicator_len(I) dR_dbi = ( 1 - indicator_I ) * 2 * self.lambda_b * b_I / indicator_len(I) df_dVi = df_dbi * U_u dR_dVi = 2 * self.lambda_v * V_bar_I / indicator_len(I) # update V_i = V_i + gamma * (df_dVi - dR_dVi) self.V[:-1, sorted_I] += self.gamma * ( df_dVi - dR_dVi)[:, None] # trick: transpose here # update b_i = b_i + gamma * (df_dbi - dR_dbi) self.V[-1, sorted_I] += self.gamma * (df_dbi - dR_dbi) # No change on J # NABLA V_k df_dbk = (1 - indicator_K ) * -sigmoid(-r_hat_uIK) / indicator_len(K) dR_dbk = ( 1 - indicator_K ) * 2 * self.lambda_b * b_K / indicator_len(K) df_dVk = df_dbk * U_u dR_dVk = 2 * self.lambda_v * V_bar_K / indicator_len(K) # update V_k = V_k + gamma * (df_dVk - dR_dVk) self.V[:-1, sorted_K] += self.gamma * ( df_dVk - dR_dVk)[:, None] # trick: transpose here # update b_k = b_k + gamma * (df_dbk - dR_dbk) self.V[-1, sorted_K] += self.gamma * (df_dbk - dR_dbk) else: # when J is not empty # NABLA U_u df_dUu = (1 - indicator_I) * sigmoid(- r_hat_uIJ / spec_alpha_u) / spec_alpha_u * ( V_bar_I - V_bar_J) + \ (1 - indicator_K) * sigmoid(- r_hat_uJK) * (V_bar_J - V_bar_K) dR_dUu = 2 * self.lambda_u * U_u # update U_u = U_u + gamma * (df_dUu - dR_dUu) self.U[u, :-1] += self.gamma * (df_dUu - dR_dUu) # NABLA V_i df_dbi = (1 - indicator_I) * sigmoid( -r_hat_uIJ / spec_alpha_u) / (indicator_len(I) * spec_alpha_u) dR_dbi = ( 1 - indicator_I ) * 2 * self.lambda_b * b_I / indicator_len(I) df_dVi = df_dbi * U_u dR_dVi = 2 * self.lambda_v * V_bar_I / indicator_len(I) # update V_i = V_i + gamma * (df_dVi - dR_dVi) self.V[:-1, sorted_I] += self.gamma * ( df_dVi - dR_dVi)[:, None] # trick: transpose here # update b_i = b_i + gamma * (df_dbi - dR_dbi) self.V[-1, sorted_I] += self.gamma * (df_dbi - dR_dbi) # NABLA V_j df_dbj = (1 - indicator_I) * ( -sigmoid(-r_hat_uIJ / spec_alpha_u) / spec_alpha_u + (1 - indicator_K) * sigmoid(-r_hat_uJK)) / indicator_len(J) dR_dbj = 2 * self.lambda_b * b_J / indicator_len(J) df_dVj = df_dbj * U_u dR_dVj = 2 * self.lambda_v * V_bar_J / indicator_len(J) # update V_j = V_j + gamma * (df_dVj - dR_dVj) self.V[:-1, sorted_J] += self.gamma * ( df_dVj - dR_dVj)[:, None] # trick: transpose here # update b_j = b_j + gamma * (df_dbj - dR_dbj) self.V[-1, sorted_J] += self.gamma * (df_dbj - dR_dbj) # NABLA V_k df_dbk = (1 - indicator_K ) * -sigmoid(-r_hat_uJK) / indicator_len(K) dR_dbk = ( 1 - indicator_K ) * 2 * self.lambda_b * b_K / indicator_len(K) df_dVk = df_dbk * U_u dR_dVk = 2 * self.lambda_v * V_bar_K / indicator_len(K) # update V_k = V_k + gamma * (df_dVk - dR_dVk) self.V[:-1, sorted_K] += self.gamma * ( df_dVk - dR_dVk)[:, None] # trick: transpose here # update b_k = b_k + gamma * (df_dbk - dR_dbk) self.V[-1, sorted_K] += self.gamma * (df_dbk - dR_dbk) else: # these are the cases when at least two sets are empty # at these cases, we ignore this user and continue the loop continue # calculate loss # f_Theta = np.log(sigmoid(r_hat_uIJ / spec_alpha_u)) + np.log(sigmoid(r_hat_uJK)) # regula = self.lambda_u * np.linalg.norm(U_u, ord=2) + self.lambda_v * ( # (np.linalg.norm(V_bar_I, ord=2) if len(I) != 0 else 0) + ( # np.linalg.norm(V_bar_J, ord=2) if len(J) != 0 else 0) + ( # np.linalg.norm(V_bar_K, ord=2)) if len(K) != 0 else 0) + self.lambda_b * ( # (b_I if len(I) != 0 else 0) ** 2 + (b_J if len(J) != 0 else 0) ** 2 + ( # b_K if len(K) != 0 else 0) ** 2) # bprh_loss = f_Theta - regula # update estimation old_estimation = self.estimation.copy() # self.estimation = np.dot(self.U, self.V) all_sampled_item = sorted(set.union(I, J, K)) # for sampled_item in all_sampled_item: # self.estimation[:, sampled_item] = np.dot(self.U, self.V[:, sampled_item]) self.estimation[:, all_sampled_item] = np.dot( self.U, self.V[:, all_sampled_item]) # estimation changed est_changed = np.linalg.norm(self.estimation - old_estimation) # we only save model to file when the num of iter % iter_to_save == 0 if (index + 1) % iter_to_save == 0: self.save(model_path=model_saved_path + "_" + str(index)) # we only calculate metric when the num of iter % iter_to_log == 0 if (index + 1) % iter_to_log == 0: if log_metric | plot_metric: # calculate metrics on test data user_to_eval = sorted(set(self.test_data.UserID)) scoring_list_5, precision_5, recall_5, avg_auc = self.scoring( user_to_eval=user_to_eval, ground_truth=self.test_data, K=5, train_data_as_reference_flag=True) scoring_list_10, precision_10, recall_10, _ = self.scoring( user_to_eval=user_to_eval, ground_truth=self.test_data, K=10, train_data_as_reference_flag=True) if log_metric: self.eval_hist.append([ index, precision_5, precision_10, recall_5, recall_10, avg_auc ]) if plot_metric: plot_losses.update({ 'Precision@5': precision_5, 'Precision@10': precision_10, 'Recall@5': recall_5, 'Recall@10': recall_10, 'AUC': avg_auc }) plot_losses.send() # Postfix will be displayed on the right, # formatted automatically based on argument's datatype t.set_postfix(est_changed=est_changed, len_I=len(I), len_J=len(J), len_K=len(K))
def fit(self, interactions_df, users_df, items_df): """ Training of the recommender. :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items defined by user_id, item_id and features of the interaction. :param pd.DataFrame users_df: DataFrame with users and their features defined by user_id and the user feature columns. :param pd.DataFrame items_df: DataFrame with items and their features defined by item_id and the item feature columns. """ del users_df, items_df # Shift item ids and user ids so that they are consecutive unique_item_ids = interactions_df['item_id'].unique() self.item_id_mapping = dict( zip(unique_item_ids, list(range(len(unique_item_ids))))) self.item_id_reverse_mapping = dict( zip(list(range(len(unique_item_ids))), unique_item_ids)) unique_user_ids = interactions_df['user_id'].unique() self.user_id_mapping = dict( zip(unique_user_ids, list(range(len(unique_user_ids))))) self.user_id_reverse_mapping = dict( zip(list(range(len(unique_user_ids))), unique_user_ids)) interactions_df = interactions_df.copy() interactions_df.replace( { 'item_id': self.item_id_mapping, 'user_id': self.user_id_mapping }, inplace=True) # Get the number of items and users self.interactions_df = interactions_df.copy() n_users = np.max(interactions_df['user_id']) + 1 n_items = np.max(interactions_df['item_id']) + 1 # Get the user-item interaction matrix (mapping to int is necessary because of how iterrows works) r = np.zeros(shape=(n_users, n_items)) for idx, interaction in interactions_df.iterrows(): r[int(interaction['user_id'])][int(interaction['item_id'])] = 1 self.r = r # Indicate positive interactions interactions_df.loc[:, 'interacted'] = 1 # Generate negative interactions negative_interactions = [] i = 0 while i < self.n_neg_per_pos * len(interactions_df): sample_size = 1000 user_ids = self.rng.choice(np.arange(n_users), size=sample_size) item_ids = self.rng.choice(np.arange(n_items), size=sample_size) j = 0 while j < sample_size and i < self.n_neg_per_pos * len( interactions_df): if r[user_ids[j]][item_ids[j]] == 0: negative_interactions.append([user_ids[j], item_ids[j], 0]) i += 1 j += 1 interactions_df = pd.concat([ interactions_df, pd.DataFrame(negative_interactions, columns=['user_id', 'item_id', 'interacted']) ]) interactions_df = interactions_df.reset_index(drop=True) # Initialize losses and loss visualization if self.print_type is not None and self.print_type == 'live': liveloss = PlotLosses() training_losses = deque(maxlen=50) training_avg_losses = [] training_epoch_losses = [] validation_losses = deque(maxlen=50) validation_avg_losses = [] validation_epoch_losses = [] last_training_total_loss = 0.0 last_validation_total_loss = 0.0 # Initialize the network self.nn_model = GMFModel(n_items, n_users, self.embedding_dim, self.seed) self.nn_model.train() self.nn_model.to(self.device) self.optimizer = optim.Adam(self.nn_model.parameters(), lr=self.lr, weight_decay=self.weight_decay) # Split the data if self.train: interaction_ids = self.rng.permutation(len(interactions_df)) train_validation_slice_idx = int( len(interactions_df) * (1 - self.validation_set_size)) training_ids = interaction_ids[:train_validation_slice_idx] validation_ids = interaction_ids[train_validation_slice_idx:] else: interaction_ids = self.rng.permutation(len(interactions_df)) training_ids = interaction_ids validation_ids = [] # Train the model for epoch in range(self.n_epochs): if self.print_type is not None and self.print_type == 'live': logs = {} # Train training_losses.clear() training_total_loss = 0.0 self.rng.shuffle(training_ids) n_batches = int(np.ceil(len(training_ids) / self.batch_size)) for batch_idx in range(n_batches): batch_ids = training_ids[(batch_idx * self.batch_size):((batch_idx + 1) * self.batch_size)] batch = interactions_df.loc[batch_ids] batch_input = torch.from_numpy( batch.loc[:, ['user_id', 'item_id']].values).long().to( self.device) y_target = torch.from_numpy( batch.loc[:, ['interacted']].values).float().to(self.device) # Create responses y = self.nn_model(batch_input).clip(0.000001, 0.999999) # Define loss and backpropagate self.optimizer.zero_grad() loss = -(y_target * y.log() + (1 - y_target) * (1 - y).log()).sum() loss.backward() self.optimizer.step() training_total_loss += loss.item() if self.print_type is not None and self.print_type == 'text': print( "\rEpoch: {}\tBatch: {}\tLast epoch - avg training loss: {:.2f} avg validation loss: {:.2f} loss: {}" .format(epoch, batch_idx, last_training_total_loss, last_validation_total_loss, loss), end="") training_losses.append(loss.item()) training_avg_losses.append(np.mean(training_losses)) # Validate validation_total_loss = 0.0 batch = interactions_df.loc[validation_ids] batch_input = torch.from_numpy( batch.loc[:, ['user_id', 'item_id']].values).long().to( self.device) y_target = torch.from_numpy( batch.loc[:, ['interacted']].values).float().to(self.device) # Create responses y = self.nn_model(batch_input).clip(0.000001, 0.999999) # Calculate validation loss loss = -(y_target * y.log() + (1 - y_target) * (1 - y).log()).sum() validation_total_loss += loss.item() # Save and print epoch losses training_last_avg_loss = training_total_loss / len(training_ids) if self.train: validation_last_avg_loss = validation_total_loss / len( validation_ids) if self.print_type is not None and self.print_type == 'live' and epoch >= 0: # A bound on epoch prevents showing extremely high losses in the first epochs logs['loss'] = training_last_avg_loss if self.train: logs['val_loss'] = validation_last_avg_loss liveloss.update(logs) liveloss.send() # Find the most popular items for the cold start problem offers_count = interactions_df.loc[:, ['item_id', 'user_id']].groupby( by='item_id').count() offers_count = offers_count.sort_values('user_id', ascending=False) self.most_popular_items = offers_count.index