Ejemplo n.º 1
0
    def __init__(self,
                 hparams,
                 dataset: HeteroNetDataset,
                 metrics=["precision"]):
        num_edge = len(dataset.edge_index_dict)
        num_layers = hparams.num_layers
        num_class = dataset.n_classes
        self.collate_fn = hparams.collate_fn
        self.multilabel = dataset.multilabel
        num_nodes = dataset.num_nodes_dict[dataset.head_node_type]

        if dataset.in_features:
            w_in = dataset.in_features
        else:
            w_in = hparams.embedding_dim

        w_out = hparams.embedding_dim

        super(HAN, self).__init__(num_edge=num_edge,
                                  w_in=w_in,
                                  w_out=w_out,
                                  num_class=num_class,
                                  num_nodes=num_nodes,
                                  num_layers=num_layers)

        if not hasattr(dataset, "x") and not hasattr(dataset, "x_dict"):
            if num_nodes > 10000:
                self.embedding = {
                    dataset.head_node_type:
                    torch.nn.Embedding(
                        num_embeddings=num_nodes,
                        embedding_dim=hparams.embedding_dim).cpu()
                }
            else:
                self.embedding = torch.nn.Embedding(
                    num_embeddings=num_nodes,
                    embedding_dim=hparams.embedding_dim)

        self.dataset = dataset
        self.head_node_type = self.dataset.head_node_type
        hparams.n_params = self.get_n_params()
        self.train_metrics = Metrics(prefix="",
                                     loss_type=hparams.loss_type,
                                     n_classes=dataset.n_classes,
                                     multilabel=dataset.multilabel,
                                     metrics=metrics)
        self.valid_metrics = Metrics(prefix="val_",
                                     loss_type=hparams.loss_type,
                                     n_classes=dataset.n_classes,
                                     multilabel=dataset.multilabel,
                                     metrics=metrics)
        self.test_metrics = Metrics(prefix="test_",
                                    loss_type=hparams.loss_type,
                                    n_classes=dataset.n_classes,
                                    multilabel=dataset.multilabel,
                                    metrics=metrics)
        hparams.name = self.name()
        hparams.inductive = dataset.inductive
        self.hparams = hparams
Ejemplo n.º 2
0
def test(model, dataloader, params):
    val_data = tqdm(dataloader.data_iterator(data_type='test',
                                             batch_size=params.batch_size),
                    total=(dataloader.size()[0] // params.batch_size))
    metrics = Metrics()
    loss_avg = RunningAverage()
    with torch.no_grad():
        for data, labels in val_data:
            model.eval()
            data = torch.tensor(data, dtype=torch.long).to(params.device)
            labels = torch.tensor(labels, dtype=torch.long).to(params.device)

            batch_masks = data != 0

            loss, logits = model(data,
                                 attention_mask=batch_masks,
                                 labels=labels)

            predicted = logits.max(2)[1]
            metrics.update(batch_pred=predicted.cpu().numpy(),
                           batch_true=labels.cpu().numpy(),
                           batch_mask=batch_masks.cpu().numpy())
            loss_avg.update(torch.mean(loss).item())
            val_data.set_postfix(type='VAL',
                                 loss='{:05.3f}'.format(loss_avg()))
    metrics.loss = loss_avg()
    return metrics
    def train_epoch(self, epoch):
        """Train an epoch."""
        self.model.train()  # Set model to training mode
        losses = Metrics()
        total_iter = len(self.train_data_loader.dataset) // self.train_data_loader.batch_size

        for idx, (x, y) in enumerate(self.train_data_loader):
            s = time.monotonic()

            x = x.to(self.device)
            y = y.to(self.device)
            y_pred = self.model(x)

            self.optimizer.zero_grad()
            loss = self.criterion(y_pred, y)
            loss.backward()
            self.optimizer.step()

            losses.update(loss.item(), x.size(0))

            self.writer.add_scalar('train/current_loss', losses.val, self.train_step)
            self.writer.add_scalar('train/avg_loss', losses.avg, self.train_step)
            self.train_step += 1

            e = time.monotonic()
            if idx % self.print_freq == 0:
                log_time = self.print_freq * (e - s)
                eta = ((total_iter - idx) * log_time) / 60.0
                print(f'Epoch {epoch} [{idx}/{total_iter}], loss={loss:.3f}, time={log_time:.2f}, ETA={eta:.2f}')

        return losses.avg
Ejemplo n.º 4
0
def debug_test_set():
    clf = pickle_load(os.path.join('models6', 'strong_classifier_276.pkl'))
    trainer = Trainer(mp_pool=Pool(8))
    trainer.load_data('data')
    print("Strong classifier test metrics:")
    predictions = clf.classify_batch(trainer.test_ds.X_integral)
    print(Metrics(predictions, trainer.test_ds.y))
Ejemplo n.º 5
0
def validate(model, val_set, params):
    val_data = tqdm(DataLoader(val_set,
                               batch_size=params.batch_size,
                               collate_fn=KeyphraseData.collate_fn),
                    total=(len(val_set) // params.batch_size))
    metrics = Metrics()
    loss_avg = RunningAverage()
    with torch.no_grad():
        model.eval()
        for data, labels, mask in val_data:

            data = data.to(params.device)
            labels = labels.to(params.device)
            mask = mask.to(params.device)

            loss, logits = model(data, attention_mask=mask, labels=labels)

            predicted = logits.max(2)[1]
            metrics.update(batch_pred=predicted.cpu().numpy(),
                           batch_true=labels.cpu().numpy(),
                           batch_mask=mask.cpu().numpy())
            loss_avg.update(torch.mean(loss).item())
            val_data.set_postfix(type='VAL',
                                 loss='{:05.3f}'.format(loss_avg()))

    metrics.loss = loss_avg()
    return metrics
Ejemplo n.º 6
0
    def _evaluate(self, model_param, criterion):
        # evaluate in CPU
        # can't move all the training dataset to GPU, in my case and resources it is too much
        with torch.no_grad():  # operations inside don't track history
            self.model_eval.load_state_dict(state_dict=model_param)
            self.model_eval.eval()

            #train_prob = self.model_eval(self.training_set.x_data)
            #train_pred = train_prob.argmax(1)
            #train_loss = criterion(train_prob, self.training_set.y_data)
            #train_acc = (train_pred == self.training_set.y_data.long()).float().mean()
            #train_f1 = metrics.f1_score(self.training_set.y_data.long().numpy(), train_pred.numpy(), average='macro')
            #train_m = Metrics(self.training_set.y_data, train_pred, self.labels)
            #train_b = train_m.balanced_score()

            gc.collect()

            val_prob = self.model_eval(self.validation_set.x_data)
            val_pred = val_prob.argmax(1)
            val_loss = criterion(val_prob, self.validation_set.y_data)
            val_acc = (
                val_pred == self.validation_set.y_data.long()).float().mean()
            val_f1 = metrics.f1_score(
                self.validation_set.y_data.long().numpy(),
                val_pred.numpy(),
                average='macro')
            val_m = Metrics(self.validation_set.y_data, val_pred, self.labels)
            val_b = val_m.balanced_score()

            gc.collect()

            # evaluating train uses too much CPU, so I actually justr need the vslidate values for now
            train_prob = val_prob
            train_pred = val_prob.argmax(1)
            train_loss = criterion(val_prob, self.validation_set.y_data)
            train_acc = (
                val_pred == self.validation_set.y_data.long()).float().mean()
            train_f1 = metrics.f1_score(
                self.validation_set.y_data.long().numpy(),
                val_pred.numpy(),
                average='macro')
            train_m = Metrics(self.validation_set.y_data, val_pred,
                              self.labels)
            train_b = val_m.balanced_score()

            return train_loss.item(), train_acc, train_f1, val_loss.item(
            ), val_acc, val_f1, train_b, val_b
Ejemplo n.º 7
0
    def run_experiment(self, load_controller, expert_demos):
        """Model predictive control.

        Arguments:
            load_controller (bool): If True, load mpc controller.
            expert_demos (bool): If True, initialize training set with extra expert demonstrations.
        """
        if load_controller:
            self.mpc = torch.load(os.path.join(self.savedir, 'mpc.pth'))

        else:
            # Initial random rollouts
            obs, acts, lengths, _, _ = self._sample_rollouts(self.init_steps,
                                                             actor=self.mpc)

            if expert_demos:
                obs_expert, acts_expert = self._load_expert_demos()
                obs = obs + tuple(o for o in obs_expert)
                acts = acts + tuple(a for a in acts_expert)

            # Train initial model
            self.mpc.train_initial(obs, acts)

        # Training loop
        step = self.mpc.X.shape[0]
        while step < self.total_steps:
            # Sample rollouts
            start = time.time()
            print(f"Rolling out {self.train_freq} timesteps...")
            obs, acts, lengths, scores, rollouts_metrics = self._sample_rollouts(
                self.train_freq, actor=self.mpc)
            step += sum(lengths)
            print_rollout_stats(obs[0], acts[0], lengths[0], scores[0])

            act_metrics = Metrics()
            flat_rollouts_metrics = [
                item for sublist in rollouts_metrics for item in sublist
            ]
            for x in flat_rollouts_metrics:
                act_metrics.store(x)
            for k, v in act_metrics.average().items():
                self.logger.log_scalar(k, v, step)

            self.logger.log_scalar("score/avg_length", np.mean(lengths), step)
            self.logger.log_scalar("score/avg_score", np.mean(scores), step)
            self.logger.log_scalar("time/rollout_time", (time.time() - start),
                                   step)

            # Train model
            train_metrics, weights = self.mpc.train_iteration(obs, acts)
            for k, v in train_metrics.items():
                self.logger.log_scalar(k, v, step)
            for k, v in weights.items():
                self.logger.log_histogram(k, v, step)

            # Save model
            torch.save(self.mpc, os.path.join(self.savedir, 'mpc.pth'))
Ejemplo n.º 8
0
def test(exp_name):
    print('loading data......')
    test_data = getattr(datasets, opt.dataset)(opt.root,
                                               opt.test_data_dir,
                                               mode='test',
                                               size=opt.testsize)
    test_dataloader = DataLoader(test_data,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=opt.num_workers)
    total_batch = int(len(test_data) / 1)
    model, _, _ = generate_model(opt)

    model.eval()

    # metrics_logger initialization
    metrics = Metrics([
        'recall', 'specificity', 'precision', 'F1', 'F2', 'ACC_overall',
        'IoU_poly', 'IoU_bg', 'IoU_mean'
    ])

    logger = get_logger('./results/' + exp_name + '.log')
    with torch.no_grad():
        for i, data in enumerate(test_dataloader):
            img, gt = data['image'], data['label']

            if opt.use_gpu:
                img = img.cuda()
                gt = gt.cuda()

            output = model(img)
            _recall, _specificity, _precision, _F1, _F2, \
            _ACC_overall, _IoU_poly, _IoU_bg, _IoU_mean = evaluate(output, gt)

            metrics.update(recall=_recall,
                           specificity=_specificity,
                           precision=_precision,
                           F1=_F1,
                           F2=_F2,
                           ACC_overall=_ACC_overall,
                           IoU_poly=_IoU_poly,
                           IoU_bg=_IoU_bg,
                           IoU_mean=_IoU_mean)

    metrics_result = metrics.mean(total_batch)

    print("Test Result:")
    logger.info(
        'recall: %.4f, specificity: %.4f, precision: %.4f, F1: %.4f, F2: %.4f, '
        'ACC_overall: %.4f, IoU_poly: %.4f, IoU_bg: %.4f, IoU_mean: %.4f' %
        (metrics_result['recall'], metrics_result['specificity'],
         metrics_result['precision'], metrics_result['F1'],
         metrics_result['F2'], metrics_result['ACC_overall'],
         metrics_result['IoU_poly'], metrics_result['IoU_bg'],
         metrics_result['IoU_mean']))
Ejemplo n.º 9
0
    def __init__(self, hparams, dataset, metrics, *args):
        super().__init__(*args)

        self.train_metrics = Metrics(prefix="",
                                     loss_type=hparams.loss_type,
                                     n_classes=dataset.n_classes,
                                     multilabel=dataset.multilabel,
                                     metrics=metrics)
        self.valid_metrics = Metrics(prefix="val_",
                                     loss_type=hparams.loss_type,
                                     n_classes=dataset.n_classes,
                                     multilabel=dataset.multilabel,
                                     metrics=metrics)
        self.test_metrics = Metrics(prefix="test_",
                                    loss_type=hparams.loss_type,
                                    n_classes=dataset.n_classes,
                                    multilabel=dataset.multilabel,
                                    metrics=metrics)
        hparams.name = self.name()
        hparams.inductive = dataset.inductive
        self.hparams = hparams
Ejemplo n.º 10
0
 def __init__(self, args):
     BaseModel.__init__(self, args)
     self.metrics = Metrics()    # 一些测量指标
     self.visual_images = []     # 需要可视化的图像
     self.visual_losses = []     # 需要可视化的loss
     if self.args.mode == 'train':
         self.visual_images += ['train_confusion_matrix']
         self.visual_losses += ['train_loss', 'train_precision', 'train_recall', 'train_f1_score']
     if self.args.mode == 'valid':
         self.visual_images += ['valid_confusion_matrix']
         self.visual_losses += ['valid_loss', 'valid_precision', 'valid_recall', 'valid_f1_score']
     if self.args.mode == 'test':
         self.visual_images += ['test_confusion_matrix']
         self.visual_losses += ['test_loss', 'test_precision', 'test_recall', 'test_f1_score']
Ejemplo n.º 11
0
    def validate(self, val_loader, models, criterions, last_best_epochs):
        batch_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        metrics = Metrics()
        losses_per_class = LossPerClassMeter(len(val_loader.dataset.dataset.classes))

        models['backbone'].eval()
        models['module'].eval()

        end = time.time()

        with torch.no_grad():
            for i, (data_x, data_y) in enumerate(val_loader):
                data_y = data_y.cuda(non_blocking=True)
                data_x = data_x.cuda(non_blocking=True)

                output = models['backbone'](data_x)
                loss = criterions['backbone'](output, data_y)

                losses_per_class.update(loss.cpu().detach().numpy(), data_y.cpu().numpy())
                loss = torch.sum(loss) / loss.size(0)

                acc = accuracy(output.data, data_y, topk=(1, 2,))
                losses.update(loss.data.item(), data_x.size(0))
                top1.update(acc[0].item(), data_x.size(0))
                top5.update(acc[1].item(), data_x.size(0))
                metrics.add_mini_batch(data_y, output)

                batch_time.update(time.time() - end)
                end = time.time()

                if i % self.args.print_freq == 0:
                    print('Test: [{0}/{1}]\t'
                          'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                          'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                          'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                          'Last best epoch {last_best_epoch}'
                          .format(i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1,
                                  last_best_epoch=last_best_epochs))

        report = metrics.get_report(target_names=val_loader.dataset.dataset.classes)
        print(' * Acc@1 {top1.avg:.3f}\t * Prec {0}\t * Recall {1} * Acc@5 {top5.avg:.3f}\t'
              .format(report['macro avg']['precision'], report['macro avg']['recall'], top1=top1, top5=top5))

        return pd.DataFrame.from_dict({f'{k}-val-loss': losses_per_class.avg[i]
                                       for i, k in enumerate(val_loader.dataset.dataset.classes)}, orient='index').T, \
            pd.DataFrame.from_dict(report)
    def validate_epoch(self):
        """Validate after training an epoch."""
        self.model.eval()  # Set model to evaluate mode
        losses = Metrics()

        with torch.no_grad():
            for idx, (x, y) in enumerate(self.val_data_loader):
                x = x.to(self.device)
                y = y.to(self.device)
                y_pred = self.model(x)
                loss = self.criterion(y_pred, y)
                losses.update(loss.item(), x.size(0))

                self.writer.add_scalar('val/current_loss', losses.val, self.val_step)
                self.writer.add_scalar('val/avg_loss', losses.avg, self.val_step)
                self.val_step += 1

        return losses.avg
Ejemplo n.º 13
0
def test(model: nn.Module,
         device: torch.device,
         test_loader: DataLoader,
         criterion: nn.Module,
         text_transform: Callable,
         log_every=40):
    print('Evaluating...')
    model.eval()
    test_cer, test_wer, test_loss = [], [], []
    data_len = len(test_loader)

    with torch.no_grad():
        for i, _data in enumerate(test_loader):
            spectrograms, labels, input_lengths, label_lengths = _data
            spectrograms, labels = spectrograms.to(device), labels.to(device)

            output = model(spectrograms)  # (batch, time, n_class)
            output = F.log_softmax(output, dim=2)
            output = output.transpose(0, 1)  # (time, batch, n_class)

            loss = criterion(output, labels, input_lengths, label_lengths)
            test_loss.append(loss.item())

            decoded_preds, decoded_targets = greedy_decode(
                output.transpose(0, 1), labels, label_lengths, text_transform)
            test_cer.append(
                word_error_rate(decoded_targets, decoded_preds, use_cer=True))
            test_wer.append(word_error_rate(decoded_targets, decoded_preds))

            if i % log_every == 0:
                print(f'{i}/{data_len}')
                print(f'Test WER: {test_wer[-1]}; CER: {test_cer[-1]}')

                for p, t in zip(decoded_preds, decoded_targets):
                    print(f'Prediction: [{p}]\t Ground Truth: [{t}]')

    avg_cer = np.mean(test_cer)
    avg_wer = np.mean(test_wer)
    avg_loss = np.mean(test_loss)

    print(
        f'Test set: Average loss: {avg_loss}, Average CER: {avg_cer} Average WER: {avg_wer}'
    )
    return Metrics(loss=avg_loss, cer=avg_cer, wer=avg_wer)
Ejemplo n.º 14
0
def train(models, optimizers, dataset, corpus, ckpts, params, args):
    epoch_num = params.epoch_num
    batch_epoch = params.batch_epoch
    autoencoder.noise_radius = params.noise_radius
    step = 0

    for e in range(epoch_num, params.max_epoch):
        for batch, (source, target) in islice(enumerate(dataset), batch_epoch, None):
            metrics = Metrics(
                epoch=e,
                max_epoch=params.max_epoch,
            )
            for p in range(params.epoch_ae):
                ae_metrics = train_autoencoder(models, optimizers, source, target, params)
                metrics.accum(ae_metrics)

            metrics['ae_loss'] /= params.epoch_ae
            metrics['acc'] /= params.epoch_ae

            batch_epoch += 1
            # anneal noise every 5 batch_epoch for now
            if batch_epoch % 5 == 0:
                autoencoder.noise_radius = autoencoder.noise_radius * 0.995
            if batch_epoch % params.print_every == 0:
                ckpts.save()
                logging.info('--- Epoch {}/{} Batch {} ---'.format(e + 1, metrics['max_epoch'], batch_epoch))
                logging.info('Loss {:.4f}'.format(float(metrics['ae_loss'])))

                params.batch_epoch = batch_epoch
                params.epoch_num = e
                params.noise_radius = autoencoder.noise_radius
                params.save(os.path.join(args.model_dir, 'params.json'))

                # Floydhub metrics
                print('{{"metric": "acc", "value": {}, "step": {}}}'.format(float(metrics['acc']), step))
                print('{{"metric": "ae_loss", "value": {}, "step": {}}}'.format(float(metrics['ae_loss']), step))


                step += 1
                tb_writer.add_scalar('train/acc', metrics['acc'], step)
                tb_writer.add_scalar('train/ae_loss', metrics['ae_loss'], step)

        batch_epoch = 0
Ejemplo n.º 15
0
def train(model: nn.Module,
          device: torch.device,
          train_loader: DataLoader,
          criterion: nn.Module,
          optimizer: nn.Module,
          scheduler,
          epoch: int,
          iter_meter,
          tb_writer: SummaryWriter,
          log_every=20) -> Metrics:
    model.train()
    data_len = len(train_loader)
    epoch_loss = []
    print('Training')

    for batch_idx, _data in enumerate(train_loader):
        spectrograms, labels, input_lengths, label_lengths = _data
        spectrograms, labels = spectrograms.to(device), labels.to(device)

        optimizer.zero_grad()

        output = model(spectrograms)  # (batch, time, n_class)
        output = F.log_softmax(output, dim=2)
        output = output.transpose(0, 1)  # (time, batch, n_class)

        loss = criterion(output, labels, input_lengths, label_lengths)
        loss.backward()
        loss_scalar = loss.item()

        optimizer.step()
        if scheduler:
            scheduler.step()
        iter_meter.step()

        if batch_idx % log_every == 0 or batch_idx == data_len:
            print(f'Train Epoch: {epoch} \t batch: {batch_idx}/{data_len}')
            print(f'Loss: {loss_scalar}')

        epoch_loss.append(loss_scalar)
        tb_writer.add_scalar('batch_loss', loss_scalar, iter_meter.get())

    return Metrics(loss=np.mean(epoch_loss))
    def test(self):
        self.model.eval()
        losses = Metrics()
        # accuracy = Metrics()

        with torch.no_grad():
            for idx, (x, y) in enumerate(self.test_data_loader):
                x = x.to(self.device)
                y = y.to(self.device)
                y_pred = self.model(x)

                loss = self.criterion(y_pred, y)
                losses.update(loss.item(), x.size(0))

                # predict = 1 if get_mean_score(y_pred.cpu().numpy()[0]) > 5 else 0
                # target = 1 if get_mean_score(y.cpu().numpy()[0]) > 5 else 0
                #
                # accuracy.update(1 if predict == target else 0)

        logger.info(f'test loss={losses.avg}')
        print(losses.avg)
        return losses.avg
Ejemplo n.º 17
0
def main():
    username = password = server = None
    parser = argparse.ArgumentParser(description='Show all boards in JIRA')
    cfg = None
    try:
        cf = ConfigFile('config.yaml')
        cfg = cf.config
        username = cfg['username']
        password = cfg['password']
        server = cfg['server']
    except FileNotFoundError as e:
        print("Config File does not exist, falling back to argument parsing")
        parser.add_argument('-u', help="Provide User Name")
        parser.add_argument('-p', help="Provide Password")
        parser.add_argument('-s', help="Provide Server URL")
    args = parser.parse_args()
    if (cfg is None):
        username = args.u
        password = args.p
        server = args.s
    jc = JiraConn(username, password, server)
    m = Metrics(jc.jira)
    m.list_boards()
Ejemplo n.º 18
0
def valid(model, valid_dataloader, total_batch):
    model.eval()

    # Metrics_logger initialization
    metrics = Metrics([
        'recall', 'specificity', 'precision', 'F1', 'F2', 'ACC_overall',
        'IoU_poly', 'IoU_bg', 'IoU_mean'
    ])

    with torch.no_grad():
        bar = tqdm(enumerate(valid_dataloader), total=total_batch)
        for i, data in bar:
            img, gt = data['image'], data['label']

            if opt.use_gpu:
                img = img.cuda()
                gt = gt.cuda()

            output = model(img)
            _recall, _specificity, _precision, _F1, _F2, \
            _ACC_overall, _IoU_poly, _IoU_bg, _IoU_mean = evaluate(output, gt)

            metrics.update(recall=_recall,
                           specificity=_specificity,
                           precision=_precision,
                           F1=_F1,
                           F2=_F2,
                           ACC_overall=_ACC_overall,
                           IoU_poly=_IoU_poly,
                           IoU_bg=_IoU_bg,
                           IoU_mean=_IoU_mean)

    metrics_result = metrics.mean(total_batch)
    model.train()

    return metrics_result
Ejemplo n.º 19
0
def xval(data_path, adaptor, classifier, summ):

    input_ = Input(FLAGS.xval_batch_size, FLAGS.num_points)
    waves, labels = input_(data_path)

    # Calculate the loss of the model.
    if FLAGS.adp:
        logits = adaptor(waves)
        logits = classifier(logits)
    else:
        logits = classifier(waves, expand_dims=True)

    logits = tf.argmax(logits, axis=-1)

    metrics = Metrics("accuracy")
    with tf.control_dependencies(
        [tf.assert_equal(tf.rank(labels), tf.rank(logits))]):
        score, xval_accu_op = metrics(labels, logits)

    assert summ, "invalid summary helper object"
    summ.register('xval', 'accuracy', score)
    xval_summ_op = summ('xval')

    return xval_accu_op, xval_summ_op
Ejemplo n.º 20
0
def train_model(args,
                model,
                train,
                dev,
                teacher_model=None,
                save_path=None,
                maxsteps=None):

    if args.tensorboard and (not args.debug):
        from tensorboardX import SummaryWriter
        writer = SummaryWriter('./runs/{}'.format(args.prefix + args.hp_str))

    # optimizer
    if args.optimizer == 'Adam':
        opt = torch.optim.Adam(
            [p for p in model.parameters() if p.requires_grad],
            betas=(0.9, 0.98),
            eps=1e-9)
    else:
        raise NotImplementedError

    # if resume training
    if (args.load_from is not None) and (args.resume):
        with torch.cuda.device(args.gpu):  # very important.
            offset, opt_states = torch.load(
                './models/' + args.load_from + '.pt.states',
                map_location=lambda storage, loc: storage.cuda())
            opt.load_state_dict(opt_states)
    else:
        offset = 0

    # metrics
    if save_path is None:
        save_path = args.model_name

    best = Best(max,
                'corpus_bleu',
                'corpus_gleu',
                'gleu',
                'loss',
                'i',
                model=model,
                opt=opt,
                path=save_path,
                gpu=args.gpu)
    train_metrics = Metrics('train', 'loss', 'real', 'fake')
    dev_metrics = Metrics('dev', 'loss', 'gleu', 'real_loss', 'fake_loss',
                          'distance', 'alter_loss', 'distance2',
                          'fertility_loss', 'corpus_gleu')
    progressbar = tqdm(total=args.eval_every, desc='start training.')

    for iters, batch in enumerate(train):

        iters += offset

        if iters % args.save_every == 0:
            args.logger.info(
                'save (back-up) checkpoints at iter={}'.format(iters))
            with torch.cuda.device(args.gpu):
                torch.save(best.model.state_dict(),
                           '{}_iter={}.pt'.format(args.model_name, iters))
                torch.save([iters, best.opt.state_dict()],
                           '{}_iter={}.pt.states'.format(
                               args.model_name, iters))

        if iters % args.eval_every == 0:
            progressbar.close()
            dev_metrics.reset()

            if args.distillation:
                outputs_course = valid_model(args,
                                             model,
                                             dev,
                                             dev_metrics,
                                             distillation=True,
                                             teacher_model=None)

            outputs_data = valid_model(
                args,
                model,
                dev,
                None if args.distillation else dev_metrics,
                teacher_model=None,
                print_out=True)
            if args.tensorboard and (not args.debug):
                writer.add_scalar('dev/GLEU_sentence_', dev_metrics.gleu,
                                  iters)
                writer.add_scalar('dev/Loss', dev_metrics.loss, iters)
                writer.add_scalar('dev/GLEU_corpus_',
                                  outputs_data['corpus_gleu'], iters)
                writer.add_scalar('dev/BLEU_corpus_',
                                  outputs_data['corpus_bleu'], iters)

                if args.distillation:
                    writer.add_scalar('dev/GLEU_corpus_dis',
                                      outputs_course['corpus_gleu'], iters)
                    writer.add_scalar('dev/BLEU_corpus_dis',
                                      outputs_course['corpus_bleu'], iters)

            if not args.debug:
                best.accumulate(outputs_data['corpus_bleu'],
                                outputs_data['corpus_gleu'], dev_metrics.gleu,
                                dev_metrics.loss, iters)
                args.logger.info(
                    'the best model is achieved at {}, average greedy GLEU={}, corpus GLEU={}, corpus BLEU={}'
                    .format(best.i, best.gleu, best.corpus_gleu,
                            best.corpus_bleu))
            args.logger.info('model:' + args.prefix + args.hp_str)

            # ---set-up a new progressor---
            progressbar = tqdm(total=args.eval_every, desc='start training.')

        if maxsteps is None:
            maxsteps = args.maximum_steps

        if iters > maxsteps:
            args.logger.info('reach the maximum updating steps.')
            break

        # --- training --- #
        model.train()

        def get_learning_rate(i, lr0=0.1, disable=False):
            if not disable:
                return lr0 * 10 / math.sqrt(args.d_model) * min(
                    1 / math.sqrt(i), i /
                    (args.warmup * math.sqrt(args.warmup)))
            return 0.00002

        opt.param_groups[0]['lr'] = get_learning_rate(
            iters + 1, disable=args.disable_lr_schedule)
        opt.zero_grad()

        # prepare the data
        inputs, input_masks, \
        targets, target_masks, \
        sources, source_masks,\
        encoding, batch_size = model.quick_prepare(batch, args.distillation)
        input_reorder, fertility_cost, decoder_inputs = None, None, inputs
        batch_fer = batch.fer_dec if args.distillation else batch.fer

        #print(input_masks.size(), target_masks.size(), input_masks.sum())

        if type(model) is FastTransformer:
            inputs, input_reorder, input_masks, fertility_cost = model.prepare_initial(
                encoding, sources, source_masks, input_masks, batch_fer)

        # Maximum Likelihood Training
        if not args.finetuning:
            loss = model.cost(targets,
                              target_masks,
                              out=model(encoding, source_masks, inputs,
                                        input_masks))
            if args.fertility:
                loss += fertility_cost

        else:
            # finetuning:

            # loss_student (MLE)
            if not args.fertility:
                decoding, out, probs = model(encoding,
                                             source_masks,
                                             inputs,
                                             input_masks,
                                             return_probs=True,
                                             decoding=True)
                loss_student = model.batched_cost(targets, target_masks,
                                                  probs)  # student-loss (MLE)
                decoder_masks = input_masks

            else:  # Note that MLE and decoding has different translations. We need to run the same code twice
                # truth
                decoding, out, probs = model(encoding,
                                             source_masks,
                                             inputs,
                                             input_masks,
                                             decoding=True,
                                             return_probs=True)
                loss_student = model.cost(targets, target_masks, out=out)
                decoder_masks = input_masks

                # baseline
                decoder_inputs_b, _, decoder_masks_b, _, _ = model.prepare_initial(
                    encoding,
                    sources,
                    source_masks,
                    input_masks,
                    None,
                    mode='mean')
                decoding_b, out_b, probs_b = model(
                    encoding,
                    source_masks,
                    decoder_inputs_b,
                    decoder_masks_b,
                    decoding=True,
                    return_probs=True)  # decode again

                # reinforce
                decoder_inputs_r, _, decoder_masks_r, _, _ = model.prepare_initial(
                    encoding,
                    sources,
                    source_masks,
                    input_masks,
                    None,
                    mode='reinforce')
                decoding_r, out_r, probs_r = model(
                    encoding,
                    source_masks,
                    decoder_inputs_r,
                    decoder_masks_r,
                    decoding=True,
                    return_probs=True)  # decode again

            if args.fertility:
                loss_student += fertility_cost

            # loss_teacher (RKL+REINFORCE)
            teacher_model.eval()
            if not args.fertility:
                inputs_student_index, _, targets_student_soft, _, _, _, encoding_teacher, _ = model.quick_prepare(
                    batch, False, decoding, probs, decoder_masks,
                    decoder_masks, source_masks)
                out_teacher, probs_teacher = teacher_model(
                    encoding_teacher,
                    source_masks,
                    inputs_student_index.detach(),
                    decoder_masks,
                    return_probs=True)
                loss_teacher = teacher_model.batched_cost(
                    targets_student_soft, decoder_masks,
                    probs_teacher.detach())
                loss = (
                    1 - args.beta1
                ) * loss_teacher + args.beta1 * loss_student  # final results

            else:
                inputs_student_index, _, targets_student_soft, _, _, _, encoding_teacher, _ = model.quick_prepare(
                    batch, False, decoding, probs, decoder_masks,
                    decoder_masks, source_masks)
                out_teacher, probs_teacher = teacher_model(
                    encoding_teacher,
                    source_masks,
                    inputs_student_index.detach(),
                    decoder_masks,
                    return_probs=True)
                loss_teacher = teacher_model.batched_cost(
                    targets_student_soft, decoder_masks,
                    probs_teacher.detach())

                inputs_student_index, _ = model.prepare_inputs(
                    batch, decoding_b, False, decoder_masks_b)
                targets_student_soft, _ = model.prepare_targets(
                    batch, probs_b, False, decoder_masks_b)

                out_teacher, probs_teacher = teacher_model(
                    encoding_teacher,
                    source_masks,
                    inputs_student_index.detach(),
                    decoder_masks_b,
                    return_probs=True)

                _, loss_1 = teacher_model.batched_cost(targets_student_soft,
                                                       decoder_masks_b,
                                                       probs_teacher.detach(),
                                                       True)

                inputs_student_index, _ = model.prepare_inputs(
                    batch, decoding_r, False, decoder_masks_r)
                targets_student_soft, _ = model.prepare_targets(
                    batch, probs_r, False, decoder_masks_r)

                out_teacher, probs_teacher = teacher_model(
                    encoding_teacher,
                    source_masks,
                    inputs_student_index.detach(),
                    decoder_masks_r,
                    return_probs=True)
                _, loss_2 = teacher_model.batched_cost(targets_student_soft,
                                                       decoder_masks_r,
                                                       probs_teacher.detach(),
                                                       True)

                rewards = -(loss_2 - loss_1).data
                rewards = rewards - rewards.mean()
                rewards = rewards.expand_as(source_masks)
                rewards = rewards * source_masks

                model.predictor.saved_fertilities.reinforce(
                    0.1 * rewards.contiguous().view(-1, 1))
                loss = (
                    1 - args.beta1
                ) * loss_teacher + args.beta1 * loss_student  # detect reinforce

        # accmulate the training metrics
        train_metrics.accumulate(batch_size, loss, print_iter=None)
        train_metrics.reset()

        # train the student
        if args.finetuning and args.fertility:
            torch.autograd.backward(
                (loss, model.predictor.saved_fertilities),
                (torch.ones(1).cuda(loss.get_device()), None))
        else:
            loss.backward()
        opt.step()

        info = 'training step={}, loss={:.3f}, lr={:.5f}'.format(
            iters, export(loss), opt.param_groups[0]['lr'])
        if args.finetuning:
            info += '| NA:{:.3f}, AR:{:.3f}'.format(export(loss_student),
                                                    export(loss_teacher))
            if args.fertility:
                info += '| RL: {:.3f}'.format(export(rewards.mean()))

        if args.fertility:
            info += '| RE:{:.3f}'.format(export(fertility_cost))

        if args.tensorboard and (not args.debug):
            writer.add_scalar('train/Loss', export(loss), iters)

        progressbar.update(1)
        progressbar.set_description(info)
Ejemplo n.º 21
0
def train(train_data,
          val_data,
          user_list_train_filtered,
          user_list_val_filtered,
          user_beta_train,
          user_beta_val,
          k,
          dataset,
          eta=0.1,
          lamb=0.1,
          tolerance=1e-4,
          num_iter_val=5,
          num_total_iter_training=6,
          random_seed=786,
          kU=None,
          cv_flag=True,
          verbose=False):

    np.random.seed(random_seed)

    user_feat = val_data.drop(['user', 'label'], axis=1).values
    user_feat_train = train_data.drop(['user', 'label'], axis=1).values
    w = np.random.normal(0, 1, user_feat.shape[1])

    metrics = Metrics()
    metrics.eta_lr = eta
    metrics.lamb_reg = lamb
    print("running for eta", eta, "and lambda", lamb)

    for i in range(num_total_iter_training):
        grad, loss = subgradient(w, train_data, user_list_train_filtered,
                                 user_beta_train, k)
        grad += lamb * w
        w = w - (eta / np.sqrt(i + 1)) * grad
        metrics.w_list.append(w)
        metrics.loss_opt_list_train.append(loss)

        y_scores = user_feat_train.dot(w)
        data_true = deepcopy(train_data)
        data_true['scores'] = y_scores
        data_true = data_true.sort_values(by='scores', ascending=False)
        data_true = data_true.reset_index(drop=True)
        metrics.micro_auc_rel_k_list_train.append(
            compute_micro(data_true, user_list_train_filtered, user_beta_train,
                          w, k))

        if verbose:
            print('Epoch', i + 1, 'completed out of', num_total_iter_training,
                  'for prec@k loss train:', metrics.loss_opt_list_train[-1])
            print('Epoch', i + 1, 'completed out of', num_total_iter_training,
                  'for prec@k grad train:', np.linalg.norm(grad))

        # evaluate combined weights
        if (cv_flag):
            if i % num_iter_val == 0:
                y_scores = user_feat.dot(w)
                data_true = deepcopy(val_data)
                data_true['scores'] = y_scores
                data_true = data_true.sort_values(by='scores', ascending=False)
                data_true = data_true.reset_index(drop=True)
                metrics.micro_auc_rel_k_list_val.append(
                    compute_micro(data_true, user_list_val_filtered,
                                  user_beta_val, w, k))

                if verbose:
                    print("\n")
                    print('Epoch', i + 1, 'completed out of',
                          num_total_iter_training, 'for prec@k loss val:',
                          metrics.micro_auc_rel_k_list_val[-1])
                    print("\n")

    return metrics, None
model.compile(loss=WeightedBinaryCrossEntropy(POS_RATIO),
              optimizer='rmsprop',
              metrics=['binary_accuracy', f1])

logger.debug('Model summary: %s', model.summary())

# Set tensorboard callback

tb = TensorBoard(log_dir='./learn_embedding_logs',
                 histogram_freq=1,
                 write_graph=True,
                 write_images=False)

# Metrics is now defined in utils

metrics = Metrics(logger)

# Train model

# NOTE:  Tensorboard callback is disabled to reduce model run time from
# approx 3 horus to 17 minutes

model.fit(
    x_train,
    y_train,
    validation_data=(x_dev, y_dev),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    #callbacks=[tb]
)
Ejemplo n.º 23
0
#

# In[ ]:

from utils import Metrics

# In[ ]:

run_id = 'seg_model_gpu{}_n{}_bs{}_lr{}'.format(gpu_id, epochs, batch_size,
                                                learning_rate)
print('\n\nTraining', run_id)
save_path = run_id + '.pkl'

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

metrics = Metrics(train_loader.dataset.num_classes,
                  train_loader.dataset.class_names)


# Used to keep track of statistics
class AverageMeter(object):
    def __init__(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
Ejemplo n.º 24
0
def train(train_data,
          val_data,
          user_list_train_filtered,
          user_list_val_filtered,
          user_beta_train,
          user_beta_val,
          k,
          eta=0.1,
          lamb=0.1,
          num_iter_val=5,
          num_total_iter_training=6,
          n_classifiers=5,
          random_seed=786,
          verbose=False):

    np.random.seed(random_seed)

    user_list_val_filtered = user_list_train_filtered[
        0:int(0.2 * len(user_list_train_filtered))]
    user_list_train_filtered = list(
        set(user_list_train_filtered) - set(user_list_val_filtered))
    val_data = train_data[train_data['user'].isin(user_list_val_filtered)]
    train_data = train_data[train_data['user'].isin(user_list_train_filtered)]

    metrics = Metrics()
    metrics.eta_lr = eta
    metrics.lamb_reg = lamb

    classifier_list = []

    kf = KFold(n_splits=n_classifiers, shuffle=True)
    features = train_data.drop(['user', 'label'], axis=1)
    labels = train_data['label']
    for _, split_indices in kf.split(features):
        split_features = features.iloc[split_indices].values
        split_labels = labels.iloc[split_indices].values
        num_examples = split_features.shape[0]

        w = np.random.normal(0, 1, (split_features.shape[1], ))
        w = w / np.linalg.norm(w)
        for num_iter in np.arange(num_total_iter_training):
            scores = sigmoid(np.dot(split_features, w))
            loss = -1 / num_examples * np.sum(split_labels * np.log(scores) +
                                              (1 - split_labels) *
                                              np.log(1 - scores))
            print("loss is ", loss)
            dLdwx = (scores - split_labels) * scores * (1 - scores)
            grad = 1 / num_examples * np.sum(
                dLdwx.reshape(-1, 1) * split_features)
            grad += lamb * w
            print("grad is ", np.linalg.norm(grad))
            print("\n")
            w = w - (eta / np.sqrt(num_iter + 1)) * grad
        accuracy = np.sum(split_labels * (scores > 0.5) + (1 - split_labels) *
                          (scores < 0.5))
        print('accuracy: {}'.format(accuracy / num_examples))
        classifier_list.append(w)
    print('eta is ', eta, 'and lambda is ', lamb)
    print('\n')

    classifiers_with_metrics = []
    for w in classifier_list:
        user_feat = val_data.drop(['user', 'label'], axis=1).values
        y_scores = user_feat.dot(w)
        data_true = deepcopy(val_data)
        data_true['scores'] = y_scores
        data_true = data_true.sort_values(by='scores', ascending=False)
        data_true = data_true.reset_index(drop=True)
        metric = compute_micro(data_true, user_list_val_filtered,
                               user_beta_train, w, k)
        classifiers_with_metrics.append((metric, w))
    classifiers_with_metrics.sort(reverse=True, key=lambda x: x[0])
    combined_w = classifiers_with_metrics[0][1]
    for _, w in classifiers_with_metrics[1:]:
        combined_w = merge_micro(val_data, combined_w, w,
                                 user_list_val_filtered, user_beta_train, k)

    # create dummy metrics
    # need weights and one validation loss for the "best iter" logic
    metrics = Metrics()
    metrics.w_list.append(combined_w)
    metrics.micro_auc_rel_k_list_val.append(0)
    metrics.micro_auc_rel_k_list_train.append(0)
    metrics.loss_opt_list_train.append(0)
    return metrics, None
Ejemplo n.º 25
0
            progressbar.set_description(info)

    if use_prog_bar:
        progressbar.close()
    return model.save_fast_weights()


# training start..
best = Best(max,
            'corpus_bleu',
            'i',
            model=model,
            opt=meta_opt,
            path=args.model_name,
            gpu=args.gpu)
train_metrics = Metrics('train', 'loss', 'real', 'fake')
dev_metrics = Metrics('dev', 'loss', 'gleu', 'real_loss', 'fake_loss',
                      'distance', 'alter_loss', 'distance2', 'fertility_loss',
                      'corpus_gleu')

# overlall progress-ba
progressbar = tqdm(total=args.eval_every, desc='start training')

while True:

    # ----- saving the checkpoint ----- #
    if iters % args.save_every == 0:
        args.logger.info('save (back-up) checkpoints at iter={}'.format(iters))
        with torch.cuda.device(args.gpu):
            torch.save(best.model.state_dict(),
                       '{}_iter={}.pt'.format(args.model_name, iters))
Ejemplo n.º 26
0
def train(models, optimizers, dataset, corpus, ckpts, params, args):
    epoch_num = params.epoch_num
    epoch_gan = params.epoch_gan
    batch_epoch = params.batch_epoch
    autoencoder.noise_radius = params.noise_radius
    step = 0

    for e in range(epoch_num, params.max_epoch):
        for batch, (source, target) in islice(enumerate(dataset), batch_epoch, None):
            metrics = Metrics(
                epoch=e,
                max_epoch=params.max_epoch,
            )
            for p in range(params.epoch_ae):
                ae_metrics = train_autoencoder(models, optimizers, source, target, params)
                metrics.accum(ae_metrics)
            for q in range(params.epoch_gan):
                for r in range(params.epoch_disc):
                    disc_metrics = train_disc(models, optimizers, source)
                    metrics.accum(disc_metrics)
                for r in range(params.epoch_enc):
                    enc_metrics = train_encoder_by_disc(models, optimizers, source, params)
                    metrics.accum(enc_metrics)
                for t in range(params.epoch_gen):
                    gen_metrics = train_gen(models, optimizers, source)
                    metrics.accum(gen_metrics)

            metrics['ae_loss'] /= params.epoch_ae
            metrics['acc'] /= params.epoch_ae
            metrics['disc_loss'] /= (params.epoch_gan * params.epoch_disc)
            metrics['disc_fake_loss'] /= (params.epoch_gan * params.epoch_disc)
            metrics['disc_real_loss'] /= (params.epoch_gan * params.epoch_disc)
            metrics['real_norm'] /= (params.epoch_gan * params.epoch_disc)
            metrics['fake_norm'] /= (params.epoch_gan * params.epoch_disc)
            metrics['gen_loss'] /= (params.epoch_gan * params.epoch_gen)

            batch_epoch += 1
            # anneal noise every 5 batch_epoch for now
            if batch_epoch % 5 == 0:
                autoencoder.noise_radius = autoencoder.noise_radius * 0.995
            if batch_epoch % params.print_every == 0:
                ckpts.save()
                logging.info('--- Epoch {}/{} Batch {} ---'.format(e + 1, metrics['max_epoch'], batch_epoch))
                logging.info('Loss {:.4f}'.format(float(metrics['ae_loss'])))
                logging.info('Disc_Loss {:.4f}'.format(float(metrics['disc_loss'])))
                logging.info('Gen_Loss {:.4f}'.format(float(metrics['gen_loss'])))

                params.batch_epoch = batch_epoch
                params.epoch_num = e
                params.epoch_gan = epoch_gan
                params.noise_radius = autoencoder.noise_radius
                params.save(os.path.join(args.model_dir, 'params.json'))

                # Floydhub metrics
                print('{{"metric": "acc", "value": {}, "step": {}}}'.format(float(metrics['acc']), step))
                print('{{"metric": "ae_loss", "value": {}, "step": {}}}'.format(float(metrics['ae_loss']), step))
                print('{{"metric": "disc_loss", "value": {}, "step": {}}}'.format(float(metrics['disc_loss']), step))
                print('{{"metric": "disc_fake_loss", "value": {}, "step": {}}}'.format(float(metrics['disc_fake_loss']), step))
                print('{{"metric": "disc_real_loss", "value": {}, "step": {}}}'.format(float(metrics['disc_real_loss']), step))
                print('{{"metric": "real_norm", "value": {}, "step": {}}}'.format(float(metrics['real_norm']), step))
                print('{{"metric": "fake_norm", "value": {}, "step": {}}}'.format(float(metrics['fake_norm']), step))
                print('{{"metric": "gen_loss", "value": {}, "step": {}}}'.format(float(metrics['gen_loss']), step))

                step += 1
                tb_writer.add_scalar('train/acc', metrics['acc'], step)
                tb_writer.add_scalar('train/ae_loss', metrics['ae_loss'], step)
                tb_writer.add_scalar('train/disc_loss', metrics['disc_loss'], step)
                tb_writer.add_scalar('train/disc_fake_loss', metrics['disc_fake_loss'], step)
                tb_writer.add_scalar('train/disc_real_loss', metrics['disc_real_loss'], step)
                tb_writer.add_scalar('train/gen_loss', metrics['gen_loss'], step)

            if batch_epoch % (params.print_every * 2) == 0:
                step += 1
                generate_sentence(models, source, corpus, step, args)
        batch_epoch = 0
Ejemplo n.º 27
0
def train_model(args,
                model,
                train,
                dev,
                src=None,
                trg=None,
                trg_len_dic=None,
                teacher_model=None,
                save_path=None,
                maxsteps=None):

    if args.tensorboard and (not args.debug):
        from tensorboardX import SummaryWriter
        writer = SummaryWriter(str(args.event_path / args.id_str))

    if type(model) is FastTransformer and args.denoising_prob > 0.0:
        denoising_weights = [
            args.denoising_weight for idx in range(args.train_repeat_dec)
        ]
        denoising_out_weights = [
            args.denoising_out_weight for idx in range(args.train_repeat_dec)
        ]

    if type(model) is FastTransformer and args.layerwise_denoising_weight:
        start, end = 0.9, 0.1
        diff = (start - end) / (args.train_repeat_dec - 1)
        denoising_weights = np.arange(start=end, stop=start,
                                      step=diff).tolist()[::-1] + [0.1]

    # optimizer
    for k, p in zip(model.state_dict().keys(), model.parameters()):
        # only finetune layers that are responsible to predicting target len
        if args.finetune_trg_len:
            if "pred_len" not in k:
                p.requires_grad = False
        else:
            if "pred_len" in k:
                p.requires_grad = False

    params = [p for p in model.parameters() if p.requires_grad]
    if args.optimizer == 'Adam':
        opt = torch.optim.Adam(params, betas=(0.9, 0.98), eps=1e-9)
    else:
        raise NotImplementedError

    # if resume training
    if (args.load_from is not None) and (args.resume):
        with torch.cuda.device(args.gpu):  # very important.
            offset, opt_states = torch.load(
                str(args.model_path / args.load_from) + '.pt.states',
                map_location=lambda storage, loc: storage.cuda())
            opt.load_state_dict(opt_states)
    else:
        offset = 0

    if not args.finetune_trg_len:
        best = Best(max,
                    *[
                        'BLEU_dec{}'.format(ii + 1)
                        for ii in range(args.valid_repeat_dec)
                    ],
                    'i',
                    model=model,
                    opt=opt,
                    path=str(args.model_path / args.id_str),
                    gpu=args.gpu,
                    which=range(args.valid_repeat_dec))
    else:
        best = Best(max,
                    *['pred_target_len_correct'],
                    'i',
                    model=model,
                    opt=opt,
                    path=str(args.model_path / args.id_str),
                    gpu=args.gpu,
                    which=[0])
    train_metrics = Metrics(
        'train loss',
        *['loss_{}'.format(idx + 1) for idx in range(args.train_repeat_dec)],
        data_type="avg")
    dev_metrics = Metrics(
        'dev loss',
        *['loss_{}'.format(idx + 1) for idx in range(args.valid_repeat_dec)],
        data_type="avg")

    if "predict" in args.trg_len_option:
        train_metrics_trg = Metrics('train loss target',
                                    *[
                                        "pred_target_len_loss",
                                        "pred_target_len_correct",
                                        "pred_target_len_approx"
                                    ],
                                    data_type="avg")
        train_metrics_average = Metrics(
            'train loss average',
            *["average_target_len_correct", "average_target_len_approx"],
            data_type="avg")
        dev_metrics_trg = Metrics('dev loss target',
                                  *[
                                      "pred_target_len_loss",
                                      "pred_target_len_correct",
                                      "pred_target_len_approx"
                                  ],
                                  data_type="avg")
        dev_metrics_average = Metrics(
            'dev loss average',
            *["average_target_len_correct", "average_target_len_approx"],
            data_type="avg")
    else:
        train_metrics_trg = None
        train_metrics_average = None
        dev_metrics_trg = None
        dev_metrics_average = None

    if not args.no_tqdm:
        progressbar = tqdm(total=args.eval_every, desc='start training.')

    if maxsteps is None:
        maxsteps = args.maximum_steps

    #targetlength = TargetLength()
    for iters, train_batch in enumerate(train):
        #targetlength.accumulate( train_batch )
        #continue

        iters += offset

        if args.save_every > 0 and iters % args.save_every == 0:
            args.logger.info(
                'save (back-up) checkpoints at iter={}'.format(iters))
            with torch.cuda.device(args.gpu):
                torch.save(
                    best.model.state_dict(),
                    '{}_iter={}.pt'.format(str(args.model_path / args.id_str),
                                           iters))
                torch.save([iters, best.opt.state_dict()],
                           '{}_iter={}.pt.states'.format(
                               str(args.model_path / args.id_str), iters))

        if iters % args.eval_every == 0:
            torch.cuda.empty_cache()
            gc.collect()
            dev_metrics.reset()
            if dev_metrics_trg is not None:
                dev_metrics_trg.reset()
            if dev_metrics_average is not None:
                dev_metrics_average.reset()
            outputs_data = valid_model(args,
                                       model,
                                       dev,
                                       dev_metrics,
                                       dev_metrics_trg=dev_metrics_trg,
                                       dev_metrics_average=dev_metrics_average,
                                       teacher_model=None,
                                       print_out=True,
                                       trg_len_dic=trg_len_dic)
            #outputs_data = [0, [0,0,0,0], 0, 0]
            if args.tensorboard and (not args.debug):
                for ii in range(args.valid_repeat_dec):
                    writer.add_scalar('dev/single/Loss_{}'.format(ii + 1),
                                      getattr(dev_metrics,
                                              "loss_{}".format(ii + 1)),
                                      iters)  # NLL averaged over dev corpus
                    writer.add_scalar('dev/single/BLEU_{}'.format(ii + 1),
                                      outputs_data['real'][ii][0],
                                      iters)  # NOTE corpus bleu

                if "predict" in args.trg_len_option:
                    writer.add_scalar("dev/single/pred_target_len_loss",
                                      outputs_data["pred_target_len_loss"],
                                      iters)
                    writer.add_scalar("dev/single/pred_target_len_correct",
                                      outputs_data["pred_target_len_correct"],
                                      iters)
                    writer.add_scalar("dev/single/pred_target_len_approx",
                                      outputs_data["pred_target_len_approx"],
                                      iters)
                    writer.add_scalar(
                        "dev/single/average_target_len_correct",
                        outputs_data["average_target_len_correct"], iters)
                    writer.add_scalar(
                        "dev/single/average_target_len_approx",
                        outputs_data["average_target_len_approx"], iters)
                """
                writer.add_scalars('dev/total/BLEUs', {"iter_{}".format(idx+1):bleu for idx, bleu in enumerate(outputs_data['bleu']) }, iters)
                writer.add_scalars('dev/total/Losses',
                    { "iter_{}".format(idx+1):getattr(dev_metrics, "loss_{}".format(idx+1))
                     for idx in range(args.valid_repeat_dec) },
                     iters )
                """

            if not args.debug:
                if not args.finetune_trg_len:
                    best.accumulate(*[xx[0] for xx in outputs_data['real']],
                                    iters)

                    values = list(best.metrics.values())
                    args.logger.info("best model : {}, {}".format( "BLEU=[{}]".format(", ".join( [ str(x) for x in values[:args.valid_repeat_dec] ] ) ), \
                                                                  "i={}".format( values[args.valid_repeat_dec] ), ) )
                else:
                    best.accumulate(*[outputs_data['pred_target_len_correct']],
                                    iters)
                    values = list(best.metrics.values())
                    args.logger.info("best model : {}".format(
                        "pred_target_len_correct = {}".format(values[0])))

            args.logger.info('model:' + args.prefix + args.hp_str)

            # ---set-up a new progressor---
            if not args.no_tqdm:
                progressbar.close()
                progressbar = tqdm(total=args.eval_every,
                                   desc='start training.')

            if type(model) is FastTransformer and args.anneal_denoising_weight:
                for ii, bb in enumerate([xx[0]
                                         for xx in outputs_data['real']][:-1]):
                    denoising_weights[ii] = 0.9 - 0.1 * int(
                        math.floor(bb / 3.0))

        if iters > maxsteps:
            args.logger.info('reached the maximum updating steps.')
            break

        model.train()

        def get_lr_transformer(i, lr0=0.1):
            return lr0 * 10 / math.sqrt(args.d_model) * min(
                1 / math.sqrt(i), i / (args.warmup * math.sqrt(args.warmup)))

        def get_lr_anneal(iters, lr0=0.1):
            lr_end = 1e-5
            return max(0, (args.lr - lr_end) * (args.anneal_steps - iters) /
                       args.anneal_steps) + lr_end

        if args.lr_schedule == "fixed":
            opt.param_groups[0]['lr'] = args.lr
        elif args.lr_schedule == "anneal":
            opt.param_groups[0]['lr'] = get_lr_anneal(iters + 1)
        elif args.lr_schedule == "transformer":
            opt.param_groups[0]['lr'] = get_lr_transformer(iters + 1)

        opt.zero_grad()

        if args.dataset == "mscoco":
            decoder_inputs, decoder_masks,\
            targets, target_masks,\
            _, source_masks,\
            encoding, batch_size, rest = model.quick_prepare_mscoco(train_batch, all_captions=train_batch[1], fast=(type(model) is FastTransformer), inputs_dec=args.inputs_dec, trg_len_option=args.trg_len_option, max_len=args.max_offset, trg_len_dic=trg_len_dic, bp=args.bp)
        else:
            decoder_inputs, decoder_masks,\
            targets, target_masks,\
            sources, source_masks,\
            encoding, batch_size, rest = model.quick_prepare(train_batch, fast=(type(model) is FastTransformer), trg_len_option=args.trg_len_option, trg_len_ratio=args.trg_len_ratio, trg_len_dic=trg_len_dic, bp=args.bp)

        losses = []
        if type(model) is Transformer:
            loss = model.cost(targets,
                              target_masks,
                              out=model(encoding, source_masks, decoder_inputs,
                                        decoder_masks))
            losses.append(loss)

        elif type(model) is FastTransformer:
            all_logits = []
            all_denoising_masks = []
            for iter_ in range(args.train_repeat_dec):
                curr_iter = min(iter_, args.num_decs - 1)
                next_iter = min(curr_iter + 1, args.num_decs - 1)

                out = model(encoding,
                            source_masks,
                            decoder_inputs,
                            decoder_masks,
                            iter_=curr_iter,
                            return_probs=False)

                if args.self_distil > 0.0:
                    loss, logits_masked = model.cost(targets,
                                                     target_masks,
                                                     out=out,
                                                     iter_=curr_iter,
                                                     return_logits=True)
                else:
                    loss = model.cost(targets,
                                      target_masks,
                                      out=out,
                                      iter_=curr_iter)

                logits = model.decoder[curr_iter].out(out)

                if args.use_argmax:
                    _, argmax = torch.max(logits, dim=-1)
                else:
                    probs = softmax(logits)
                    probs_sz = probs.size()
                    logits_ = Variable(probs.data, requires_grad=False)
                    argmax = torch.multinomial(
                        logits_.contiguous().view(-1, probs_sz[-1]),
                        1).view(*probs_sz[:-1])

                if args.self_distil > 0.0:
                    all_logits.append(logits_masked)

                losses.append(loss)

                decoder_inputs_ = 0
                denoising_mask = 1
                if args.next_dec_input in ["both", "emb"]:
                    if args.denoising_prob > 0.0 and np.random.rand(
                    ) < args.denoising_prob:
                        cor = corrupt_target(targets, decoder_masks,
                                             len(trg.vocab),
                                             denoising_weights[iter_],
                                             args.corruption_probs)

                        emb = F.embedding(
                            cor, model.decoder[next_iter].out.weight *
                            math.sqrt(args.d_model))
                        denoising_mask = 0
                    else:
                        emb = F.embedding(
                            argmax, model.decoder[next_iter].out.weight *
                            math.sqrt(args.d_model))

                    if args.denoising_out_weight > 0:
                        if denoising_out_weights[iter_] > 0.0:
                            corrupted_argmax = corrupt_target(
                                argmax, decoder_masks,
                                denoising_out_weights[iter_])
                        else:
                            corrupted_argmax = argmax
                        emb = F.embedding(
                            corrupted_argmax,
                            model.decoder[next_iter].out.weight *
                            math.sqrt(args.d_model))
                    decoder_inputs_ += emb
                all_denoising_masks.append(denoising_mask)

                if args.next_dec_input in ["both", "out"]:
                    decoder_inputs_ += out
                decoder_inputs = decoder_inputs_

            # self distillation loss if requested
            if args.self_distil > 0.0:
                self_distil_losses = []

                for logits_i in range(1, len(all_logits) - 1):
                    self_distill_loss_i = 0.0
                    for logits_j in range(logits_i + 1, len(all_logits)):
                        self_distill_loss_i += \
                                all_denoising_masks[logits_j] * \
                                all_denoising_masks[logits_i] * \
                                (1/(logits_j-logits_i)) * args.self_distil * F.mse_loss(all_logits[logits_i], all_logits[logits_j].detach())

                    self_distil_losses.append(self_distill_loss_i)

                self_distil_loss = sum(self_distil_losses)

        loss = sum(losses)

        # accmulate the training metrics
        train_metrics.accumulate(batch_size, *losses, print_iter=None)
        if train_metrics_trg is not None:
            train_metrics_trg.accumulate(batch_size,
                                         *[rest[0], rest[1], rest[2]])
        if train_metrics_average is not None:
            train_metrics_average.accumulate(batch_size, *[rest[3], rest[4]])
        if type(model) is FastTransformer and args.self_distil > 0.0:
            (loss + self_distil_loss).backward()
        else:
            if "predict" in args.trg_len_option:
                if args.finetune_trg_len:
                    rest[0].backward()
                else:
                    loss.backward()
            else:
                loss.backward()

        if args.grad_clip > 0:
            total_norm = nn.utils.clip_grad_norm(params, args.grad_clip)
        opt.step()

        mid_str = ''
        if type(model) is FastTransformer and args.self_distil > 0.0:
            mid_str += 'distil={:.5f}, '.format(
                self_distil_loss.cpu().data.numpy()[0])
        if type(model) is FastTransformer and "predict" in args.trg_len_option:
            mid_str += 'pred_target_len_loss={:.5f}, '.format(
                rest[0].cpu().data.numpy()[0])
        if type(model) is FastTransformer and args.denoising_prob > 0.0:
            mid_str += "/".join(
                ["{:.1f}".format(ff) for ff in denoising_weights[:-1]]) + ", "

        info = 'update={}, loss={}, {}lr={:.1e}'.format(
            iters, "/".join(["{:.3f}".format(export(ll)) for ll in losses]),
            mid_str, opt.param_groups[0]['lr'])

        if args.no_tqdm:
            if iters % args.eval_every == 0:
                args.logger.info("update {} : {}".format(
                    iters, str(train_metrics)))
        else:
            progressbar.update(1)
            progressbar.set_description(info)

        if iters % args.eval_every == 0 and args.tensorboard and (
                not args.debug):
            for idx in range(args.train_repeat_dec):
                writer.add_scalar(
                    'train/single/Loss_{}'.format(idx + 1),
                    getattr(train_metrics, "loss_{}".format(idx + 1)), iters)
            if "predict" in args.trg_len_option:
                writer.add_scalar(
                    "train/single/pred_target_len_loss",
                    getattr(train_metrics_trg, "pred_target_len_loss"), iters)
                writer.add_scalar(
                    "train/single/pred_target_len_correct",
                    getattr(train_metrics_trg, "pred_target_len_correct"),
                    iters)
                writer.add_scalar(
                    "train/single/pred_target_len_approx",
                    getattr(train_metrics_trg, "pred_target_len_approx"),
                    iters)
                writer.add_scalar(
                    "train/single/average_target_len_correct",
                    getattr(train_metrics_average,
                            "average_target_len_correct"), iters)
                writer.add_scalar(
                    "train/single/average_target_len_approx",
                    getattr(train_metrics_average,
                            "average_target_len_approx"), iters)

            train_metrics.reset()
            if train_metrics_trg is not None:
                train_metrics_trg.reset()
            if train_metrics_average is not None:
                train_metrics_average.reset()
Ejemplo n.º 28
0
    path_figure = os.path.join(root_path, "figs")
    os.makedirs(model_path)
    os.makedirs(path_figure)
    shutil.copytree(os.path.abspath('config/'), os.path.join(root_path, 'config'))

print("Using", device_type)

number_agents = config.agents.number_predators + config.agents.number_preys
# Definition of the agents
agents = [AgentMADDPG("predator", "predator-{}".format(k), device, config.agents)
          for k in range(config.agents.number_predators)]
agents += [AgentMADDPG("prey", "prey-{}".format(k), device, config.agents)
           for k in range(config.agents.number_preys)]

metrics = []
collision_metric = Metrics()
actors_noise = []
# Definition of the memories and set to device
# Define the metrics for all agents
for agent in agents:
    metrics.append(Metrics())

    # If we have to load the pretrained model
    if config.learning.use_model:
        path = os.path.abspath(os.path.join(config.learning.model_path, agent.id + ".pth"))
        agent.load(path)

env = Env(config.env, config)
shared_memory = ReplayMemory(config.replay_memory.size)
# Add agents to the environment
for k in range(len(agents)):
Ejemplo n.º 29
0
        p for p in model.get_parameters(type=args.finetune_params)
        if p.requires_grad
    ],
                                betas=(0.9, 0.98),
                                eps=1e-9)
    corpus_bleu = -1

    # training start..
    best = Best(max,
                'corpus_bleu',
                'i',
                model=model,
                opt=self_opt,
                path=args.model_name,
                gpu=args.gpu)
    dev_metrics = Metrics('dev', 'loss', 'gleu')

    outputs_data = valid_model(args,
                               model,
                               dev_real,
                               dev_metrics,
                               print_out=False)
    corpus_bleu0 = outputs_data['corpus_bleu']
    fast_weights = [(weights, corpus_bleu0)]

    if args.tensorboard and (not args.debug):
        writer.add_scalar('dev/BLEU_corpus_', outputs_data['corpus_bleu'],
                          dev_iters)

    for j in range(args.valid_epochs):
        args.logger.info("Fine-tuning epoch: {}".format(j))
Ejemplo n.º 30
0
        pair_features = load_pair_features(f_pair_feature_key, f_pair_feature_value)
        train_data = DirectionalTripletsWithPairFeature(options["embedding"], train_hyper2hypo, pair_features)
    else:
        train_data = DirectionalTriplets(options["embedding"], train_hyper2hypo)
    print("=== Finish constructing dataset ===")
    print("Number of training hyposets: {}".format(len(train_data)))
    kwargs = {'num_workers': 1, 'pin_memory': True} if options["device_id"] != -1 else {}
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=options["batch_size"], shuffle=True,
                                               drop_last=False)

    # Construct testing set
    f_test = options["test_pairs_file"]
    test_pairs = load_element_pairs(f_test, with_label=False)
    print("Number of testing term pairs: {}".format(len(test_pairs)))

    # Start model tunning
    results = Results('./results/tune_{}.txt'.format(args.comment))
    metrics = Metrics()
    for hp in sample_hyperparameters(num=200):
        for m in hp:
            options[m] = hp[m]  # update hyper-parameters

        options["pt"] = {
            "name": hp["pt_name"],
            "dropout": hp["edge_dropout"]
        }

        best_overall_metric, best_epoch, best_metrics = run(train_loader, test_pairs, options)
        metrics.metrics = best_metrics
        results.save_metrics(hp, metrics)