Ejemplo n.º 1
0
def validate_hvd(val_loader, model, epoch, writer, verbose, early_stopping,
                 hvd, start):
    model.eval()
    val_loss = utils.Metric('val_loss', hvd)
    val_top1 = utils.Metric('val_top1', hvd)
    val_top5 = utils.Metric('val_top5', hvd)

    with tqdm(total=len(val_loader),
              desc='Validate Epoch  #{}'.format(epoch + 1),
              disable=not verbose) as t:
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.cuda(), target.cuda()
                output = model(data)

                val_loss.update(F.cross_entropy(output, target))
                prec1, prec5 = utils.accuracy(output, target, topk=(1, 5))
                val_top1.update(prec1)
                val_top5.update(prec5)
                t.set_postfix({
                    'loss': val_loss.avg.item(),
                    'top1': 100. * val_top1.avg.item(),
                    'top5': 100. * val_top5.avg.item()
                })
                t.update(1)

        early_stopping(val_loss.avg.item(), model, ckpt_dir=config.path)
        if early_stopping.early_stop:
            print("Early stopping")
            utils.time(time.time() - start)
            os._exit(0)
        writer.add_scalar('val/loss', val_loss.avg, epoch)
        writer.add_scalar('val/top1', val_top1.avg, epoch)
        writer.add_scalar('val/top5', val_top5.avg, epoch)
    return val_top1.avg
Ejemplo n.º 2
0
def train_hvd(train_loader, model, optimizer, epoch, config, writer, verbose,
              hvd):
    train_loss = utils.Metric('train_loss', hvd)
    train_top1 = utils.Metric('train_top1', hvd)
    train_top5 = utils.Metric('train_top1', hvd)

    model.train()

    with tqdm(total=len(train_loader),
              desc='Train Epoch     #{}'.format(epoch + 1),
              disable=not verbose) as t:
        for batch_idx, (data, target) in enumerate(train_loader):
            utils.adjust_learning_rate_hvd(epoch, batch_idx, config,
                                           train_loader, hvd.size(), optimizer)

            data, target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            # Split data into sub-batches of size batch_size
            for i in range(0, len(data), config.batch_size):
                data_batch = data[i:i + config.batch_size]
                target_batch = target[i:i + config.batch_size]
                output = model(data_batch)
                prec1, prec5 = utils.accuracy(output,
                                              target_batch,
                                              topk=(1, 5))
                train_top1.update(prec1)
                train_top5.update(prec5)
                loss = F.cross_entropy(output, target_batch)
                train_loss.update(loss)
                # Average gradients among sub-batches
                loss.div_(math.ceil(float(len(data)) / config.batch_size))
                loss.backward()
            # Gradient is applied across all ranks
            optimizer.step()
            t.set_postfix({
                'loss': train_loss.avg.item(),
                'top1': 100. * train_top1.avg.item(),
                'top5': 100. * train_top5.avg.item()
            })
            t.update(1)

        writer.add_scalar('train/loss', train_loss.avg, epoch)
        writer.add_scalar('train/top1', train_top1.avg, epoch)
        writer.add_scalar('train/top5', train_top5.avg, epoch)
Ejemplo n.º 3
0
    def train_classify(self, net, loss_fn, args, trainLoader, valLoader):
        net.train()

        opt = torch.optim.Adam(net.parameters(), lr=args.lr_siamese)
        opt.zero_grad()

        train_losses = []
        time_start = time.time()
        queue = deque(maxlen=20)

        # print('steps:', args.max_steps)

        # epochs = int(np.ceil(args.max_steps / len(trainLoader)))
        epochs = 1

        total_batch_id = 0
        metric = utils.Metric()

        for epoch in range(epochs):

            train_loss = 0
            metric.reset_acc()

            with tqdm(total=len(trainLoader),
                      desc=f'Epoch {epoch + 1}/{epochs}') as t:
                for batch_id, (img, label) in enumerate(trainLoader, 1):

                    # print('input: ', img1.size())

                    if args.cuda:
                        img, label = Variable(img.cuda()), Variable(
                            label.cuda())
                    else:
                        img, label = Variable(img), Variable(label)

                    net.train()
                    opt.zero_grad()

                    output = net.forward(img)
                    metric.update_acc(output, label)
                    loss = loss_fn(output, label)
                    # print('loss: ', loss.item())
                    train_loss += loss.item()
                    loss.backward()

                    opt.step()
                    total_batch_id += 1
                    t.set_postfix(loss=f'{train_loss / batch_id:.4f}',
                                  train_acc=f'{metric.get_acc():.4f}')

                    train_losses.append(train_loss)

                    t.update()

        return net
Ejemplo n.º 4
0
Archivo: main.py Proyecto: jdddog/GTS
def eval(model, dataset, args):
    model.eval()
    with torch.no_grad():
        all_ids = []
        all_preds = []
        all_labels = []
        all_lengths = []
        all_sens_lengths = []
        all_token_ranges = []
        for i in range(dataset.batch_count):
            sentence_ids, tokens, lengths, masks, sens_lens, token_ranges, aspect_tags, tags = dataset.get_batch(i)
            preds = model(tokens, masks)
            preds = torch.argmax(preds, dim=3)
            all_preds.append(preds)
            all_labels.append(tags)
            all_lengths.append(lengths)
            all_sens_lengths.extend(sens_lens)
            all_token_ranges.extend(token_ranges)
            all_ids.extend(sentence_ids)

        all_preds = torch.cat(all_preds, dim=0).cpu().tolist()
        all_labels = torch.cat(all_labels, dim=0).cpu().tolist()
        all_lengths = torch.cat(all_lengths, dim=0).cpu().tolist()

        metric = utils.Metric(args, all_preds, all_labels, all_lengths, all_sens_lengths, all_token_ranges, ignore_index=-1)
        precision, recall, f1 = metric.score_uniontags()
        aspect_results = metric.score_aspect()
        opinion_results = metric.score_opinion()
        print('Aspect term\tP:{:.5f}\tR:{:.5f}\tF1:{:.5f}'.format(aspect_results[0], aspect_results[1],
                                                                  aspect_results[2]))
        print('Opinion term\tP:{:.5f}\tR:{:.5f}\tF1:{:.5f}'.format(opinion_results[0], opinion_results[1],
                                                                   opinion_results[2]))
        print(args.task + '\tP:{:.5f}\tR:{:.5f}\tF1:{:.5f}\n'.format(precision, recall, f1))

    model.train()
    return precision, recall, f1
Ejemplo n.º 5
0
    drop_hidden=0.1,
    initer_stddev=0.02,
    loss=ks.losses.SparseCategoricalCrossentropy(from_logits=True),
    metric=ks.metrics.SparseCategoricalCrossentropy(from_logits=True),
    num_epochs=2,
    num_heads=3,
    num_rounds=2,
    num_shards=2,
    optimizer=ks.optimizers.Adam(),
    width_dec=40,
    width_enc=50,
)

params.update(
    loss=qu.Loss(),
    metric=qu.Metric(),
)


def main(ps, fn, root=None, groups=None, count=None):
    qu.Config.runtime.is_training = True
    groups = groups or qs.groups
    for r in range(ps.num_rounds):
        for g in groups:
            print(f'\nRound {r + 1}, group {g}...\n=======================')
            fn(ps, qd.dset_for(ps, root, g, count=count), model_for(ps, g))


if __name__ == '__main__':
    ps = qu.Params(**params)
    root = f'/tmp/q/data/small'
Ejemplo n.º 6
0
def test(data,
         model,
         args,
         iteration,
         device,
         logger=None,
         num=None,
         plot=False):

    model.eval()

    metric = utils.Metric()
    # metric_im = utils.Metric_image()
    loss_list = []

    if iteration is not None:
        print(f"{iteration}")

    for i, ret in enumerate(data.load()):
        Xs, Xt, Ys, Yt, labels = ret
        labels = torch.from_numpy(np.array(labels,
                                           dtype=np.float32)).to(device)
        Xs, Xt, Ys, Yt = (
            Xs.to(device),
            Xt.to(device),
            Ys.to(device),
            Yt.to(device),
        )

        preds, predt, pred_det = model(Xs, Xt)

        loss_p = BCE_loss(predt, Yt, with_logits=True)
        loss_q = BCE_loss(preds, Ys, with_logits=True)
        loss_det = F.binary_cross_entropy_with_logits(pred_det.squeeze(),
                                                      labels.squeeze())
        loss = loss_p + loss_q + args.gamma * loss_det
        loss_list.append(loss.data.cpu().numpy())
        print(f"{i}:")

        def fnp(x):
            return x.data.cpu().numpy()

        predt = torch.sigmoid(predt)
        preds = torch.sigmoid(preds)

        metric.update([fnp(Ys), fnp(Yt)], [fnp(preds), fnp(predt)])

        if logger:
            logger.add_scalar("test_loss/total", loss, iteration)
        if plot:
            plot_dir = Path("tmp_plot") / args.dataset
            plot_dir.mkdir(exist_ok=True, parents=True)

            for ii in range(Xt.shape[0]):
                im1, im2 = torch_to_im(Xt[ii]), torch_to_im(Xs[ii])
                gt1, gt2 = torch_to_im(Yt[ii]), torch_to_im(Ys[ii])
                pred1, pred2 = torch_to_im(predt[ii]), torch_to_im(preds[ii])

                fig, axes = plt.subplots(nrows=3, ncols=2)
                axes[0, 0].imshow(im1)
                axes[0, 1].imshow(im2)
                axes[1, 0].imshow(gt1, cmap="jet")
                axes[1, 1].imshow(gt2, cmap="jet")
                axes[2, 0].imshow(pred1, cmap="jet")
                axes[2, 1].imshow(pred2, cmap="jet")

                fig.savefig(str(plot_dir / f"{i}_{ii}.jpg"))
                plt.close("all")

        if num is not None and i >= num:
            break

    out = metric.final()

    test_loss = np.mean(loss_list)
    print(f"\ntest loss : {test_loss:.4f}\n")

    return out, test_loss
Ejemplo n.º 7
0
    def train_fewshot(self, net, loss_fn, args, train_loader, val_loaders):
        net.train()
        val_tol = args.early_stopping
        opt = torch.optim.Adam([{
            'params': net.sm_net.parameters()
        }, {
            'params': net.ft_net.parameters(),
            'lr': args.lr_resnet
        }],
                               lr=args.lr_siamese)

        opt.zero_grad()

        train_losses = []
        time_start = time.time()
        queue = deque(maxlen=20)

        # print('steps:', args.max_steps)

        # epochs = int(np.ceil(args.max_steps / len(trainLoader)))
        epochs = args.epochs

        metric = utils.Metric()

        max_val_acc = 0
        max_val_acc_knwn = 0
        max_val_acc_unknwn = 0
        best_model = ''

        drew_graph = False

        val_counter = 0

        for epoch in range(epochs):

            train_loss = 0
            metric.reset_acc()

            with tqdm(total=len(train_loader),
                      desc=f'Epoch {epoch + 1}/{args.epochs}') as t:
                for batch_id, (img1, img2,
                               label) in enumerate(train_loader, 1):

                    # print('input: ', img1.size())

                    if args.cuda:
                        img1, img2, label = Variable(img1.cuda()), Variable(
                            img2.cuda()), Variable(label.cuda())
                    else:
                        img1, img2, label = Variable(img1), Variable(
                            img2), Variable(label)

                    if not drew_graph:
                        self.writer.add_graph(net, (img1, img2), verbose=True)
                        self.writer.flush()
                        drew_graph = True

                    net.train()
                    opt.zero_grad()

                    output = net.forward(img1, img2)
                    metric.update_acc(output, label)
                    loss = loss_fn(output, label)
                    # print('loss: ', loss.item())
                    train_loss += loss.item()
                    loss.backward()

                    opt.step()
                    t.set_postfix(loss=f'{train_loss / batch_id:.4f}',
                                  train_acc=f'{metric.get_acc():.4f}')

                    # if total_batch_id % args.log_freq == 0:
                    #     logger.info('epoch: %d, batch: [%d]\tacc:\t%.5f\tloss:\t%.5f\ttime lapsed:\t%.2f s' % (
                    #         epoch, batch_id, metric.get_acc(), train_loss / args.log_freq, time.time() - time_start))
                    #     train_loss = 0
                    #     metric.reset_acc()
                    #     time_start = time.time()

                    train_losses.append(train_loss)

                    t.update()

                self.writer.add_scalar('Train/Loss',
                                       train_loss / len(train_loader), epoch)
                self.writer.add_scalar('Train/Acc', metric.get_acc(), epoch)
                self.writer.flush()

                if val_loaders is not None and epoch % args.test_freq == 0:
                    net.eval()

                    val_acc_unknwn, val_acc_knwn = -1, -1

                    if args.eval_mode == 'fewshot':
                        if not self.new_split_type:
                            val_rgt, val_err, val_acc = self.test_fewshot(
                                args,
                                net,
                                val_loaders[0],
                                loss_fn,
                                val=True,
                                epoch=epoch)
                        else:
                            val_rgt_knwn, val_err_knwn, val_acc_knwn = self.test_fewshot(
                                args,
                                net,
                                val_loaders[0],
                                loss_fn,
                                val=True,
                                epoch=epoch,
                                comment='known')
                            val_rgt_unknwn, val_err_unknwn, val_acc_unknwn = self.test_fewshot(
                                args,
                                net,
                                val_loaders[1],
                                loss_fn,
                                val=True,
                                epoch=epoch,
                                comment='unknown')

                    elif args.eval_mode == 'simple':  # todo not compatible with new data-splits
                        val_rgt, val_err, val_acc = self.test_simple(
                            args,
                            net,
                            val_loaders,
                            loss_fn,
                            val=True,
                            epoch=epoch)
                    else:
                        raise Exception('Unsupporeted eval mode')

                    if self.new_split_type:
                        self.logger.info(
                            'known val acc: [%f], unknown val acc [%f]' %
                            (val_acc_knwn, val_acc_unknwn))
                        self.logger.info('*' * 30)
                        if val_acc_knwn > max_val_acc_knwn:
                            self.logger.info(
                                'known val acc: [%f], beats previous max [%f]'
                                % (val_acc_knwn, max_val_acc_knwn))
                            self.logger.info(
                                'known rights: [%d], known errs [%d]' %
                                (val_rgt_knwn, val_err_knwn))
                            max_val_acc_knwn = val_acc_knwn

                        if val_acc_unknwn > max_val_acc_unknwn:
                            self.logger.info(
                                'unknown val acc: [%f], beats previous max [%f]'
                                % (val_acc_unknwn, max_val_acc_unknwn))
                            self.logger.info(
                                'unknown rights: [%d], unknown errs [%d]' %
                                (val_rgt_unknwn, val_err_unknwn))
                            max_val_acc_unknwn = val_acc_unknwn

                        val_acc = ((val_rgt_knwn + val_rgt_unknwn) *
                                   1.0) / (val_rgt_knwn + val_rgt_unknwn +
                                           val_err_knwn + val_err_unknwn)

                        self.writer.add_scalar('Total_Val/Acc', val_acc, epoch)
                        self.writer.flush()

                        val_rgt = (val_rgt_knwn + val_rgt_unknwn)
                        val_err = (val_err_knwn + val_err_unknwn)

                    if val_acc > max_val_acc:
                        val_counter = 0
                        self.logger.info(
                            'saving model... current val acc: [%f], previous val acc [%f]'
                            % (val_acc, max_val_acc))
                        best_model = self.save_model(args, net, epoch, val_acc)
                        max_val_acc = val_acc

                    else:
                        val_counter += 1
                        self.logger.info(
                            'Not saving, best val [%f], current was [%f]' %
                            (max_val_acc, val_acc))

                        if val_counter >= val_tol:  # early stopping
                            self.logger.info(
                                '*** Early Stopping, validation acc did not exceed [%f] in %d val accuracies ***'
                                % (max_val_acc, val_tol))
                            break

                    queue.append(val_rgt * 1.0 / (val_rgt + val_err))

            self._tb_draw_histograms(args, net, epoch)

        with open('train_losses', 'wb') as f:
            pickle.dump(train_losses, f)

        acc = 0.0
        for d in queue:
            acc += d
        print("#" * 70)
        print('queue len: ', len(queue))

        if args.project_tb:
            print("Start projecting")
            # self._tb_project_embeddings(args, net.ft_net, train_loader, 1000)
            print("Projecting done")

        return net, best_model