Beispiel #1
0
def tester(cfg):
    print('testing')
    dataloader_test, dataset_size_test = data.make_dataloader(cfg,
                                                              is_train=False)

    model = modeling.build(cfg)

    if cfg.TEST.MODEL.startswith('.'):
        load_path = cfg.TEST.MODEL.replace(".", os.path.realpath("."))
    else:
        load_path = cfg.TEST.MODEL

    model = torch.load(load_path)
    model.cuda()

    vis_test = Visualization(cfg, dataset_size_test, is_train=False)

    writer_path = os.path.join(cfg.VISUALIZATION_DIRECTORY,
                               cfg.EXPERIMENT_NAME)
    writer = SummaryWriter(writer_path)

    total_iterations = 0
    total_iterations_val = 0

    model.eval()
    epoch = 1
    for iteration, batch in enumerate(dataloader_test):
        index = batch[0]

        videoFeat = batch[1].cuda()
        videoFeat_lengths = batch[2].cuda()

        tokens = batch[3].cuda()
        tokens_lengths = batch[4].cuda()

        start = batch[5].cuda()
        end = batch[6].cuda()

        localiz = batch[7].cuda()
        localiz_lengths = batch[8]

        time_starts = batch[9]
        time_ends = batch[10]

        factors = batch[11]
        fps = batch[12]
        frame_start = batch[13]
        frame_end = batch[14]

        loss, individual_loss, pred_start, pred_end, attention, atten_loss = model(
            videoFeat, videoFeat_lengths, tokens, tokens_lengths, start, end,
            localiz, frame_start, frame_end)
        aux = vis_test.run(index, pred_start,
                           pred_end, start, end, videoFeat_lengths, epoch,
                           loss.detach(), individual_loss, attention,
                           atten_loss, time_starts, time_ends, factors, fps)
        total_iterations_val += 1
    a = vis_test.plot(epoch)
Beispiel #2
0
def main():
    try:
        cfg = prepare_experiment(eval_cfg, 'e')
        model = YOLOv2Model(cfg, training=False)
        eval_dataloader = make_dataloader(cfg, training=False)
        model.eval(eval_dataloader)
    except KeyboardInterrupt:
        handle_keyboard_interruption(cfg)
    except:
        handle_other_exception(cfg)
Beispiel #3
0
def main():
    try:
        cfg = prepare_experiment(train_cfg, 't')
        model = YOLOv2Model(cfg, training=True)
        train_dataloader, eval_dataloader = make_dataloader(cfg, training=True)
        model.train(train_dataloader, eval_dataloader)
    except KeyboardInterrupt:
        handle_keyboard_interruption(cfg)
    except:
        handle_other_exception(cfg)
Beispiel #4
0
 def __init__(self, dataset, n_epochs, epochs_per_line, lr, lr_schedule, batch_size, save_frequency, incompressible_flow, empirical_vars, data_root_dir='./', n_classes=None, n_data_points=None, init_identity=True):
     super().__init__()
     
     self.dataset = dataset
     self.n_epochs = n_epochs
     self.epochs_per_line = epochs_per_line
     self.lr = lr
     self.lr_schedule = lr_schedule
     self.batch_size = batch_size
     self.save_frequency = min(save_frequency, n_epochs)
     self.incompressible_flow = bool(incompressible_flow)
     self.empirical_vars = bool(empirical_vars)
     self.init_identity = bool(init_identity)
     
     self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
     self.timestamp = str(int(time()))
     
     if self.dataset == '10d':
         self.net = construct_net_10d(coupling_block='gin' if self.incompressible_flow else 'glow', init_identity=init_identity)
         assert type(n_classes) is int
         self.n_classes = n_classes
         self.n_dims = 10
         self.save_dir = os.path.join('./artificial_data_save/', self.timestamp)
         self.latent, self.data, self.target = generate_artificial_data_10d(self.n_classes, n_data_points)
         self.train_loader = make_dataloader(self.data, self.target, self.batch_size)
     elif self.dataset == 'EMNIST':
         if not init_identity:
             raise RuntimeError('init_identity=False not implemented for EMNIST experiments')
         self.net = construct_net_emnist(coupling_block='gin' if self.incompressible_flow else 'glow')
         self.n_classes = 10
         self.n_dims = 28*28
         self.save_dir = os.path.join('./emnist_save/', self.timestamp)
         self.data_root_dir = data_root_dir
         self.train_loader = make_dataloader_emnist(batch_size=self.batch_size, train=True, root_dir=self.data_root_dir)
         self.test_loader  = make_dataloader_emnist(batch_size=1000, train=False, root_dir=self.data_root_dir)
     else:
         raise RuntimeError("Check dataset name. Doesn't match.")
     
     if not empirical_vars:
         self.mu = nn.Parameter(torch.zeros(self.n_classes, self.n_dims).to(self.device)).requires_grad_()
         self.log_sig = nn.Parameter(torch.zeros(self.n_classes, self.n_dims).to(self.device)).requires_grad_()
         # initialize these parameters to reasonable values
         self.set_mu_sig(init=True)
         
     
     self.to(self.device)
Beispiel #5
0
def train(args, model, optimizer, criterion, gids=None):
    """
    Training
    """
    tb = SummaryWriter(comment='_{}'.format(args.loss_type))
    model.train()

    train_loss = []
    t0 = int(time.time())

    for epoch in range(args.num_epochs):
        if epoch % 10 == 0:
            dataloader = make_dataloader(args, epoch)
        print('=== Epoch {}/{} ==='.format(epoch, args.num_epochs))
        adjust_lr_exp(optimizer, args.lr, epoch+1, args.num_epochs, args.lr_decay_start_epoch)

        for iteration, (image, label) in enumerate(dataloader):
            if args.cuda:
                image, label = image.cuda(gids[0]), label.cuda(gids[0])

            if args.loss_type == 'softmax':
                _, logits = model(image)
                loss = criterion(logits, label)
            elif args.loss_type == 'softmax-triplet':
                feat, logits = model(image)
                loss = args.alpha * criterion['softmax'](logits, label) \
                       + (1 - args.alpha) * criterion['triplet'](feat, label)
            else:
                feat = model(image)
                loss = criterion(feat, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print training info
            train_loss.append(loss.item())

            if args.loss_type == 'dmml':
                print('Episode: {}, Loss: {:.6f}'.format(iteration, loss.item()))
            else:
                print('Batch: {}, Loss: {:.6f}'.format(iteration, loss.item()))

        avg_training_loss = np.mean(train_loss)
        print('Average loss: {:.6f}'.format(avg_training_loss))
        tb.add_scalar('Train loss', avg_training_loss, epoch+1)
        train_loss = []

        t = int(time.time())
        print('Time elapsed: {}h {}m'.format((t - t0) // 3600, ((t - t0) % 3600) // 60))

        if epoch % 100 == 0 and epoch >= args.num_epochs // 2:
            model_save_path = os.path.join(args.exp_root, 'model_{}.pth'.format(epoch))
            if gids is not None and len(gids) > 1:
                torch.save(model.module.state_dict(), model_save_path)
            else:
                torch.save(model.state_dict(), model_save_path)
            print('Model {} saved.'.format(epoch))

    model_save_path = os.path.join(args.exp_root, 'model_last.pth'.format(epoch))
    if gids is not None and len(gids) > 1:
        torch.save(model.module.state_dict(), model_save_path)
    else:
        torch.save(model.state_dict(), model_save_path)
    print('Final model saved.')

    tb.close()

    eval(gid=gids[0], dataset=args.dataset, dataset_root=args.dataset_root, which='last', exp_dir=args.exp_root)
def _train():

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    if device != "cpu":
        torch.cuda.manual_seed_all(args.seed)

    tokenizer = BertTokenizer.from_pretrained(
        'bert-base-japanese-whole-word-masking',
        do_lower_case=False,
        tokenize_chinese_chars=False)
    logger.info("loading data")
    logger.info(f"loading data {args.train_file}")

    train_dataloader = make_dataloader(args.train_file, args.max_seq_length,
                                       args.train_batch_size, tokenizer)
    valid_dataloader = make_dataloader(args.valid_file, args.max_seq_length,
                                       args.train_batch_size, tokenizer)

    logger.info("building model")
    model = BertMouth.from_pretrained(args.bert_model,
                                      num_labels=tokenizer.vocab_size)
    model.to(device)

    param_optimizer = list(model.named_parameters())
    param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

    logger.info("setting optimizer")
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    optimization_steps = len(train_dataloader) * args.num_train_epochs
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                warmup_steps=0,
                                                t_total=optimization_steps)
    loss_fct = CrossEntropyLoss(ignore_index=0)

    def calc_batch_loss(batch):
        batch = tuple(t.to(device) for t in batch)
        input_ids, y, input_mask, input_type_id, masked_pos = batch

        logits = model(input_ids, input_type_id, input_mask)
        logits = logits.view(-1, tokenizer.vocab_size)
        y = y.view(-1)
        loss = loss_fct(logits, y)
        return loss

    logger.info("train starts")
    model.train()
    summary_writer = SummaryWriter(log_dir="logs")
    generated_texts = []
    try:
        for epoch in trange(int(args.num_train_epochs), desc="Epoch"):
            train_loss = 0.
            running_num = 0
            for step, batch in enumerate(train_dataloader):
                loss = calc_batch_loss(batch)
                loss.backward()

                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()

                train_loss += loss.item()
                running_num += len(batch[0])
            logger.info("[{0} epochs] "
                        "train loss: {1:.3g} ".format(epoch + 1, train_loss /
                                                      running_num))
            summary_writer.add_scalar("train_loss", train_loss / running_num,
                                      epoch)

            model.eval()
            valid_loss = 0.
            valid_num = 0
            for batch in valid_dataloader:
                valid_loss += calc_batch_loss(batch).item()
                valid_num += len(batch[0])

            generated_texts.append(
                generate(tokenizer=tokenizer,
                         device=device,
                         length=25,
                         max_length=args.max_seq_length,
                         model=model))
            logger.info("[{0} epochs] valid loss: {1:.3g}".format(
                epoch + 1, valid_loss / valid_num))
            summary_writer.add_scalar("val_loss", valid_loss / valid_num,
                                      epoch)

            model.train()
    except KeyboardInterrupt:
        logger.info("KeyboardInterrupt")

    summary_writer.close()
    dt_now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    save(args, model, tokenizer, str(dt_now))
Beispiel #7
0
def train(args, tokenizer, device):
    logger.info("loading data")
    train_dataloader = make_dataloader(args.train_file, args.max_seq_length,
                                       args.train_batch_size, tokenizer)
    valid_dataloader = make_dataloader(args.valid_file, args.max_seq_length,
                                       args.train_batch_size, tokenizer)

    logger.info("building model")
    model = BertMouth.from_pretrained(args.bert_model,
                                      num_labels=tokenizer.vocab_size)
    model.to(device)

    param_optimizer = list(model.named_parameters())
    param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

    logger.info("setting optimizer")
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    optimization_steps = len(train_dataloader) * args.num_train_epochs
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate, eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                     num_warmup_steps=0,
                                     num_training_steps=optimization_steps)
    loss_fct = CrossEntropyLoss(ignore_index=0)
    def calc_batch_loss(batch):
        batch = tuple(t.to(device) for t in batch)
        input_ids, y, input_mask, input_type_id, masked_pos = batch
        next_sentence_label = torch.tensor([1 for _ in range(len(input_ids))]).to(device)
        
        masked_lm_labels = input_ids.clone()
        masked_lm_labels[masked_lm_labels == 4] = -100
        
        outputs = model(input_ids=input_ids, token_type_ids=input_type_id, attention_mask=input_mask, masked_lm_labels=masked_lm_labels, next_sentence_label=next_sentence_label)
        
        logits = outputs[0].view(-1, tokenizer.vocab_size)
        y = y.view(-1)
        loss = loss_fct(logits, y) + outputs[1].item() + outputs[2].item()
        
        return loss
    
    logger.info("train starts")
    model.train()
    summary_writer = SummaryWriter(log_dir="logs")
    generated_texts = []
    try:
        for epoch in trange(int(args.num_train_epochs), desc="Epoch"):
            train_loss = 0.
            running_num = 0
            for step, batch in enumerate(train_dataloader):
                loss = calc_batch_loss(batch)
                loss.backward()

                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()

                train_loss += loss.item()
                running_num += len(batch[0])
            logger.info("[{0} epochs] "
                        "train loss: {1:.3g} ".format(epoch + 1,
                                                      train_loss / running_num))
            summary_writer.add_scalar("train_loss",
                                      train_loss / running_num, epoch)

            model.eval()
            valid_loss = 0.
            valid_num = 0
            for batch in valid_dataloader:
                valid_loss += calc_batch_loss(batch).item()
                valid_num += len(batch[0])

            generated_texts.append(generate(tokenizer=tokenizer,
                                            device=device,
                                            length=25,
                                            max_length=args.max_seq_length,
                                            model=model))
            logger.info("[{0} epochs] valid loss: {1:.3g}".format(epoch + 1,
                                                                  valid_loss / valid_num))
            summary_writer.add_scalar("val_loss",
                                      valid_loss / valid_num, epoch)

            model.train()
    except KeyboardInterrupt:
        logger.info("KeyboardInterrupt")

    summary_writer.close()
    dt_now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    save(args, model, tokenizer, str(dt_now))
Beispiel #8
0
def trainer(cfg):
    print('trainer')
    dataloader_train, dataset_size_train = data.make_dataloader(cfg,
                                                                is_train=True)
    dataloader_test, dataset_size_test = data.make_dataloader(cfg,
                                                              is_train=False)

    model = modeling.build(cfg)
    model.cuda()
    #model = torch.load("/home/crodriguezo/projects/phd/moment-localization-with-NLP/mlnlp_lastversion/checkpoints/anet_config7/model_epoch_80")
    optimizer = solver.make_optimizer(cfg, model)

    vis_train = Visualization(cfg, dataset_size_train)
    vis_test = Visualization(cfg, dataset_size_test, is_train=False)

    writer_path = os.path.join(cfg.VISUALIZATION_DIRECTORY,
                               cfg.EXPERIMENT_NAME)
    writer = SummaryWriter(writer_path)

    total_iterations = 0
    total_iterations_val = 0

    for epoch in range(cfg.EPOCHS):
        print("Epoch {}".format(epoch))
        model.train()
        for iteration, batch in enumerate(dataloader_train):
            index = batch[0]

            videoFeat = batch[1].cuda()
            videoFeat_lengths = batch[2].cuda()

            tokens = batch[3].cuda()
            tokens_lengths = batch[4].cuda()

            start = batch[5].cuda()
            end = batch[6].cuda()

            localiz = batch[7].cuda()
            localiz_lengths = batch[8]
            time_starts = batch[9]
            time_ends = batch[10]
            factors = batch[11]
            fps = batch[12]

            loss, individual_loss, pred_start, pred_end, attention, atten_loss = model(
                videoFeat, videoFeat_lengths, tokens, tokens_lengths, start,
                end, localiz)
            print("Loss :{}".format(loss))
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            optimizer.step()

            vis_train.run(index, pred_start,
                          pred_end, start, end, videoFeat_lengths, epoch,
                          loss.detach(), individual_loss, attention,
                          atten_loss, time_starts, time_ends, factors, fps)

            writer.add_scalar(f'mlnlp/Progress_Loss', loss.item(),
                              total_iterations)

            writer.add_scalar(f'mlnlp/Progress_Attention_Loss',
                              atten_loss.item(), total_iterations)

            writer.add_scalar(f'mlnlp/Progress_Mean_IoU', vis_train.mIoU[-1],
                              total_iterations)

            total_iterations += 1.

        writer.add_scalar(f'mlnlp/Train_Loss', np.mean(vis_train.loss), epoch)

        writer.add_scalar(f'mlnlp/Train_Mean_IoU', np.mean(vis_train.mIoU),
                          epoch)

        vis_train.plot(epoch)
        torch.save(
            model,
            "./checkpoints/{}/model_epoch_{}".format(cfg.EXPERIMENT_NAME,
                                                     epoch))

        model.eval()
        for iteration, batch in enumerate(dataloader_test):
            index = batch[0]

            videoFeat = batch[1].cuda()
            videoFeat_lengths = batch[2].cuda()

            tokens = batch[3].cuda()
            tokens_lengths = batch[4].cuda()

            start = batch[5].cuda()
            end = batch[6].cuda()
            localiz = batch[7].cuda()
            localiz_lengths = batch[8]
            time_starts = batch[9]
            time_ends = batch[10]
            factors = batch[11]
            fps = batch[12]

            loss, individual_loss, pred_start, pred_end, attention, atten_loss = model(
                videoFeat, videoFeat_lengths, tokens, tokens_lengths, start,
                end, localiz)
            vis_test.run(index, pred_start,
                         pred_end, start, end, videoFeat_lengths, epoch,
                         loss.detach(), individual_loss, attention, atten_loss,
                         time_starts, time_ends, factors, fps)
            #print(loss)
            writer.add_scalar(f'mlnlp/Progress_Valid_Loss', loss.item(),
                              total_iterations_val)

            writer.add_scalar(f'mlnlp/Progress_Valid_Atten_Loss',
                              atten_loss.item(), total_iterations_val)

            writer.add_scalar(f'mlnlp/Progress_Valid_Mean_IoU',
                              vis_test.mIoU[-1], total_iterations_val)

            total_iterations_val += 1

        writer.add_scalar(f'mlnlp/Valid_Loss', np.mean(vis_test.loss), epoch)

        writer.add_scalar(f'mlnlp/Valid_Mean_IoU', np.mean(vis_test.mIoU),
                          epoch)

        a = vis_test.plot(epoch)
        writer.add_scalars(f'mlnlp/Valid_tIoU_th', a, epoch)
Beispiel #9
0
def tester(cfg):
    print('testing')
    dataloader_test, dataset_size_test = data.make_dataloader(cfg,
                                                              is_train=False)

    model = modeling.build(cfg)
    # torch.nn.Module.dump_patches = True
    model = torch.load(cfg.TEST.MODEL)
    # print(model)
    model.cuda()

    vis_test = Visualization(cfg, dataset_size_test, is_train=False)

    writer_path = os.path.join(cfg.VISUALIZATION_DIRECTORY,
                               cfg.EXPERIMENT_NAME)
    writer = SummaryWriter(writer_path)

    total_iterations = 0
    total_iterations_val = 0

    model.eval()
    epoch = 1
    results_data = {}
    for iteration, batch in enumerate(dataloader_test):

        index = batch[0]

        videoFeat = batch[1].cuda()
        videoFeat_lengths = batch[2].cuda()

        tokens = batch[3].cuda()
        tokens_lengths = batch[4].cuda()

        start = batch[5].cuda()
        end = batch[6].cuda()

        localiz = batch[7].cuda()
        localiz_lengths = batch[8]
        time_starts = batch[9]
        time_ends = batch[10]
        factors = batch[11]
        fps = batch[12]

        objects = batch[13].cuda()
        objects_lengths = batch[14].cuda()

        humans = batch[15].cuda()
        humans_lengths = batch[16].cuda()

        loss, individual_loss, pred_start, pred_end, attention,atten_loss, attentionNodeQueryHO, attentionNodeQueryVH, attentionNodeQueryVO = model(videoFeat, videoFeat_lengths, \
                                                                                    objects, objects_lengths, \
                                                                                    humans, humans_lengths, \
                                                                                    tokens, tokens_lengths, \
                                                                                    start, end, localiz)
        aux = vis_test.run(index, pred_start,
                           pred_end, start, end, videoFeat_lengths, epoch,
                           loss.detach(), individual_loss, attention,
                           atten_loss, time_starts, time_ends, factors, fps,
                           attentionNodeQueryHO, attentionNodeQueryVH,
                           attentionNodeQueryVO)
        total_iterations_val += 1
        for k, v in aux.items():
            results_data[k] = v
Beispiel #10
0
def trainer(cfg):
    print('trainer')
    dataloader_train, dataset_size_train = data.make_dataloader(cfg,
                                                                is_train=True)
    dataloader_test, dataset_size_test = data.make_dataloader(cfg,
                                                              is_train=False)

    model = modeling.build(cfg)
    model.cuda()

    optimizer = solver.make_optimizer(cfg, model)
    scheduler = StepLR(optimizer, step_size=6, gamma=0.01)

    vis_train = Visualization(cfg, dataset_size_train)
    vis_test = Visualization(cfg, dataset_size_test, is_train=False)

    writer_path = os.path.join(cfg.VISUALIZATION_DIRECTORY,
                               cfg.EXPERIMENT_NAME)
    writer = SummaryWriter(writer_path)

    total_iterations = 0
    total_iterations_val = 0

    for epoch in range(cfg.EPOCHS):
        # Decay Learning Rate
        # print("Epoch {}".format(epoch))
        print('Epoch:', epoch, 'LR:', scheduler.get_lr())
        model.train()
        for iteration, batch in enumerate(dataloader_train):
            index = batch[0]

            videoFeat = batch[1].cuda()
            videoFeat_lengths = batch[2].cuda()

            tokens = batch[3].cuda()
            tokens_lengths = batch[4].cuda()

            start = batch[5].cuda()
            end = batch[6].cuda()

            localiz = batch[7].cuda()
            localiz_lengths = batch[8]
            time_starts = batch[9]
            time_ends = batch[10]
            factors = batch[11]
            fps = batch[12]

            objects = batch[13].cuda()
            objects_lengths = batch[14].cuda()

            humans = batch[15].cuda()
            humans_lengths = batch[16].cuda()

            loss, individual_loss, pred_start, pred_end, attention, atten_loss, attentionNodeQueryHO, attentionNodeQueryVH, attentionNodeQueryVO = model(videoFeat, videoFeat_lengths, \
                                                                                      objects, objects_lengths, \
                                                                                      humans, humans_lengths, \
                                                                                      tokens, tokens_lengths, \
                                                                                      start, end, localiz)
            # print("Loss :{}".format(loss))
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            optimizer.step()

            vis_train.run(index, pred_start,
                          pred_end, start, end, videoFeat_lengths, epoch,
                          loss.detach(), individual_loss, attention,
                          atten_loss, time_starts, time_ends, factors, fps,
                          attentionNodeQueryHO, attentionNodeQueryVH,
                          attentionNodeQueryVO)

            writer.add_scalar(f'mlnlp/Progress_Loss', loss.item(),
                              total_iterations)

            writer.add_scalar(f'mlnlp/Progress_Attention_Loss',
                              atten_loss.item(), total_iterations)

            writer.add_scalar(f'mlnlp/Progress_Mean_IoU', vis_train.mIoU[-1],
                              total_iterations)

            total_iterations += 1.

        writer.add_scalar(f'mlnlp/Train_Loss', np.mean(vis_train.loss), epoch)

        writer.add_scalar(f'mlnlp/Train_Mean_IoU', np.mean(vis_train.mIoU),
                          epoch)

        scheduler.step()
        vis_train.plot(epoch)
        torch.save(
            model,
            "./checkpoints/{}/model_epoch_{}".format(cfg.EXPERIMENT_NAME,
                                                     epoch))

        model.eval()
        for iteration, batch in enumerate(dataloader_test):
            index = batch[0]

            videoFeat = batch[1].cuda()
            videoFeat_lengths = batch[2].cuda()

            tokens = batch[3].cuda()
            tokens_lengths = batch[4].cuda()

            start = batch[5].cuda()
            end = batch[6].cuda()

            localiz = batch[7].cuda()
            localiz_lengths = batch[8]
            time_starts = batch[9]
            time_ends = batch[10]
            factors = batch[11]
            fps = batch[12]

            objects = batch[13].cuda()
            objects_lengths = batch[14].cuda()

            humans = batch[15].cuda()
            humans_lengths = batch[16].cuda()

            loss, individual_loss, pred_start, pred_end, attention,atten_loss, attentionNodeQueryHO, attentionNodeQueryVH, attentionNodeQueryVO = model(videoFeat, videoFeat_lengths, \
                                                                                     objects, objects_lengths, \
                                                                                     humans, humans_lengths, \
                                                                                     tokens, tokens_lengths, \
                                                                                     start, end, localiz)

            vis_test.run(index, pred_start,
                         pred_end, start, end, videoFeat_lengths, epoch,
                         loss.detach(), individual_loss, attention, atten_loss,
                         time_starts, time_ends, factors, fps,
                         attentionNodeQueryHO, attentionNodeQueryVH,
                         attentionNodeQueryVO)
            #print(index)
            writer.add_scalar(f'mlnlp/Progress_Valid_Loss', loss.item(),
                              total_iterations_val)

            writer.add_scalar(f'mlnlp/Progress_Valid_Atten_Loss',
                              atten_loss.item(), total_iterations_val)

            writer.add_scalar(f'mlnlp/Progress_Valid_Mean_IoU',
                              vis_test.mIoU[-1], total_iterations_val)

            total_iterations_val += 1

        writer.add_scalar(f'mlnlp/Valid_Loss', np.mean(vis_test.loss), epoch)

        writer.add_scalar(f'mlnlp/Valid_Mean_IoU', np.mean(vis_test.mIoU),
                          epoch)

        a = vis_test.plot(epoch)
        writer.add_scalars(f'mlnlp/Valid_tIoU_th', a, epoch)
Beispiel #11
0
def train(args, tokenizer, device):
    logger.info("loading data")

    # 教師データを読み込む
    train_dataloader = make_dataloader(args.train_file, args.max_seq_length,
                                       args.train_batch_size, tokenizer)
    # 評価データを読み込む
    valid_dataloader = make_dataloader(args.valid_file, args.max_seq_length,
                                       args.train_batch_size, tokenizer)

    logger.info("building model")

    # BertMouthモデルの事前学習モデル(おそらく通常のBERTモデルと同じ構造)を読み込む
    model = BertMouth.from_pretrained(args.bert_model,
                                      num_labels=tokenizer.vocab_size)
    # GPU/CPUを設定する
    model.to(device)

    # 名前がpoolerではないパラメータを取得する
    param_optimizer = list(model.named_parameters())
    param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

    logger.info("setting optimizer")

    # decayに含まれるパラメータとそうでないパラメータにわける
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    # ステップは教師データの大きさ * epoch
    optimization_steps = len(train_dataloader) * args.num_train_epochs
    # 最適化アルゴリズムの指定
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate, eps=args.adam_epsilon)
    # スケジューラーは学習率を調整してくれる
    scheduler = WarmupLinearSchedule(optimizer,
                                     warmup_steps=0,
                                     t_total=optimization_steps)
    loss_fct = CrossEntropyLoss(ignore_index=0)

    def calc_batch_loss(batch):
        # データ型をGPU/CPUにあわせて変更
        batch = tuple(t.to(device) for t in batch)
        input_ids, y, input_mask, input_type_id, masked_pos = batch

        # モデルから出力を計算
        logits = model(input_ids, input_type_id, input_mask)
        logits = logits.view(-1, tokenizer.vocab_size)
        y = y.view(-1)

        # 損失を計算
        loss = loss_fct(logits, y)
        return loss

    logger.info("train starts")
    # モデルを学習モードに変更
    model.train()
    # ログの出力先を指定
    summary_writer = SummaryWriter(log_dir="logs")

    generated_texts = []
    try:
        # trangeは進捗バー表示ができる便利ツール
        for epoch in trange(int(args.num_train_epochs), desc="Epoch"):
            train_loss = 0.
            running_num = 0

            # 教師データをバッチ別に処理していく
            for step, batch in enumerate(train_dataloader):

                # 損失を計算
                loss = calc_batch_loss(batch)

                # 勾配を計算、パラメーターを更新
                loss.backward()
                optimizer.step()
                scheduler.step()

                # 勾配は適宜初期化?
                optimizer.zero_grad()

                # 出力用、損失の合計とステップ数
                train_loss += loss.item()
                running_num += len(batch[0])
            logger.info("[{0} epochs] "
                        "train loss: {1:.3g} ".format(epoch + 1,
                                                      train_loss / running_num))
            summary_writer.add_scalar("train_loss",
                                      train_loss / running_num, epoch)

            # モデルを評価モードにする
            model.eval()
            valid_loss = 0.
            valid_num = 0
            for batch in valid_dataloader:
                # 評価データに対して予測を適用、出力用
                valid_loss += calc_batch_loss(batch).item()
                valid_num += len(batch[0])

            # 出力用リストにデータを追加
            generated_texts.append(generate(tokenizer=tokenizer,
                                            device=device,
                                            length=25,
                                            max_length=args.max_seq_length,
                                            model=model))
            logger.info("[{0} epochs] valid loss: {1:.3g}".format(epoch + 1,
                                                                  valid_loss / valid_num))
            summary_writer.add_scalar("val_loss",
                                      valid_loss / valid_num, epoch)

            # 学習モードに再度切り替え
            model.train()
    except KeyboardInterrupt:
        logger.info("KeyboardInterrupt")

    # ログ出力を終了
    summary_writer.close()
    dt_now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    # モデルを保存
    save(args, model, tokenizer, str(dt_now))
Beispiel #12
0
def trainer(cfg):
    print('trainer')
    dataloader_train, dataset_size_train = data.make_dataloader(cfg,
                                                                is_train=True)
    dataloader_test, dataset_size_test = data.make_dataloader(cfg,
                                                              is_train=False)
    print(dataset_size_train)
    print(dataset_size_test)

    model = modeling.build(cfg)
    if cfg.MODE_TRAIN == 'resume':
        model = torch.load("./checkpoints/{}/model_{}_epoch_{}".format(
            cfg.EXPERIMENT_NAME, cfg.MODEL_NAME, cfg.MODE_TRAIN_RESUME_EPOCH))

    model = torch.load("./checkpoints/{}/model_{}".format(
        cfg.EXPERIMENT_NAME, cfg.MODEL_NAME))

    model.cuda()
    optimizer = solver.make_optimizer(cfg, model)
    #model = torch.load("/home/crodriguezo/projects/phd/moment-localization-with-NLP/mlnlp_lastversion/checkpoints/anet_config7/model_epoch_80")

    vis_train = Visualization(cfg, dataset_size_train)
    vis_test = Visualization(cfg, dataset_size_test, is_train=False)

    writer_path = os.path.join(cfg.VISUALIZATION_DIRECTORY,
                               cfg.EXPERIMENT_NAME)
    writer = SummaryWriter(writer_path)

    total_iterations = 0
    total_iterations_val = 0
    cfg.EPOCHS = 1
    for epoch in range(cfg.EPOCHS):
        model.eval()
        sumloss = 0
        sumsample = 0
        with torch.no_grad():
            for iteration, batch in enumerate(dataloader_test):
                index = batch[0]

                videoFeat = batch[1].cuda()
                videoFeat_lengths = batch[2].cuda()

                tokens = batch[3].cuda()
                tokens_lengths = batch[4].cuda()
                if cfg.MODEL_NAME == 'TMLGA':
                    start = batch[5].cuda()
                    end = batch[6].cuda()
                    localiz = batch[7].cuda()
                    frame_start = batch[13]
                    frame_end = batch[14]
                else:
                    start = batch[5]
                    end = batch[6]
                    localiz = batch[7]
                    frame_start = batch[13].cuda()
                    frame_end = batch[14].cuda()

                localiz_lengths = batch[8]
                time_starts = batch[9]
                time_ends = batch[10]
                factors = batch[11]
                fps = batch[12]

                duration = batch[15]
                vid_names = batch[16]
                loss, individual_loss, pred_start, pred_end, attention, atten_loss = model(
                    videoFeat, videoFeat_lengths, tokens, tokens_lengths,
                    start, end, localiz, frame_start, frame_end)
                sumloss += loss.item() * float(videoFeat.shape[0])
                sumsample += videoFeat.shape[0]
                # print("Test_Loss :{}".format(loss))
                vis_test.run(index, pred_start, pred_end, start, end, videoFeat_lengths, epoch, loss.detach(), individual_loss, \
                    attention,atten_loss, time_starts, time_ends, factors, fps, duration,vid_names)
                #print(loss)
                writer.add_scalar(f'mlnlp/Progress_Valid_Loss', loss.item(),
                                  total_iterations_val)

                writer.add_scalar(f'mlnlp/Progress_Valid_Atten_Loss',
                                  atten_loss.item(), total_iterations_val)

                writer.add_scalar(f'mlnlp/Progress_Valid_Mean_IoU',
                                  vis_test.mIoU[-1], total_iterations_val)

                total_iterations_val += 1
                # del videoFeat,videoFeat_lengths,tokens,tokens_lengths,start,end,localiz
                # torch.cuda.empty_cache()
        print("Test_Loss :{}".format(sumloss / sumsample))
        writer.add_scalar(f'mlnlp/Valid_Loss', np.mean(vis_test.loss), epoch)

        writer.add_scalar(f'mlnlp/Valid_Mean_IoU', np.mean(vis_test.mIoU),
                          epoch)

        a = vis_test.plot(epoch)
        writer.add_scalars(f'mlnlp/Valid_tIoU_th', a, epoch)