Ejemplo n.º 1
0
def valid(valid_loader, model, logger):
    model.eval()

    losses = AverageMeter()

    # Batches
    for data in valid_loader:
        # Move to GPU, if available
        padded_input, padded_target, input_lengths = data
        padded_input = padded_input.to(Config.device)
        padded_target = padded_target.to(Config.device)
        input_lengths = input_lengths.to(Config.device)

        with torch.no_grad():
            # Forward prop.
            pred, gold = model(padded_input, input_lengths, padded_target)
            loss, n_correct = cal_performance(pred,
                                              gold,
                                              smoothing=args.label_smoothing)
            try:
                assert (not math.isnan(loss.item()))
            except AssertionError:
                print('n_correct: ' + str(n_correct))
                print('data: ' + str(n_correct))
                continue

        # Keep track of metrics
        losses.update(loss.item())

    # Print status
    logger.info('\nValidation Loss {loss.val:.5f} ({loss.avg:.5f})\n'.format(
        loss=losses))

    return losses.avg
Ejemplo n.º 2
0
def valid(valid_loader, model, logger):
    model.eval()

    losses = AverageMeter()

    # Batches
    for data in tqdm(valid_loader):
        # Move to GPU, if available
        padded_input, padded_target, input_lengths = data
        padded_input = padded_input.to(device)
        padded_target = padded_target.to(device)
        input_lengths = input_lengths.to(device)

        with torch.no_grad():
            # Forward prop.
            pred, gold = model(padded_input, input_lengths, padded_target)
            loss, n_correct = cal_performance(pred,
                                              gold,
                                              smoothing=args.label_smoothing)

        # Keep track of metrics
        losses.update(loss.item())

    # Print status
    logger.info('\nValidation Loss {loss.val:.5f} ({loss.avg:.5f})\n'.format(
        loss=losses))

    return losses.avg
Ejemplo n.º 3
0
def train(train_loader, model, optimizer, epoch, logger, writer):
    model.train()  # train mode (dropout and batchnorm is used)

    losses = AverageMeter()
    times = AverageMeter()

    start = time.time()

    # Batches
    for i, (data) in enumerate(train_loader):
        # Move to GPU, if available
        padded_input, padded_target, input_lengths = data
        padded_input = padded_input.to(Config.device)
        padded_target = padded_target.to(Config.device)
        input_lengths = input_lengths.to(Config.device)

        # Forward prop.
        pred, gold = model(padded_input, input_lengths, padded_target)

        loss, n_correct = cal_performance(pred, gold, smoothing=args.label_smoothing)

        try:
            assert (not math.isnan(loss.item()))
        except AssertionError:
            print('n_correct: ' + str(n_correct))
            print('data: ' + str(n_correct))
            continue

        # Back prop.
        optimizer.zero_grad()
        loss.backward()

        # Clip gradients
        clip_gradient(optimizer.optimizer, Config.grad_clip)

        # Update weights
        optimizer.step()

        # Keep track of metrics
        elapsed = time.time() - start
        start = time.time()

        losses.update(loss.item())
        times.update(elapsed)

        # Print status
        if i % Config.print_freq == 0:
            logger.info('Epoch: [{0}][{1}/{2}]\t'
                        'Batch time {time.val:.5f} ({time.avg:.5f})\t'
                        'Loss {loss.val:.5f} ({loss.avg:.5f})'.format(epoch, i, len(train_loader), time=times,
                                                                      loss=losses))
            writer.add_scalar('step_num/train_loss', losses.avg, optimizer.step_num)
            writer.add_scalar('step_num/learning_rate', optimizer.lr, optimizer.step_num)

    return losses.avg
Ejemplo n.º 4
0
    def _run_one_epoch(self, cross_valid=False):
        total_loss = 0
        data_loader = self.tr_loader if not cross_valid else self.cv_loader
        for i, (data) in enumerate(data_loader):
            padded_input, input_lengths, padded_target = data
            padded_input = padded_input.cuda()
            input_lengths = input_lengths.cuda()
            padded_target = padded_target.cuda()
            pred, gold = self.model(padded_input, input_lengths, padded_target)
            loss, n_correct = cal_performance(pred,
                                              gold,
                                              smoothing=self.label_smoothing)
            if not cross_valid:
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            total_loss += loss.item()
            non_pad_mask = gold.ne(IGNORE_ID)
            n_word = non_pad_mask.sum().item()
            return total_loss / (i + 1)
Ejemplo n.º 5
0
def train(train_loader, model, optimizer, epoch, logger):
    model.train()  # train mode (dropout and batchnorm is used)

    losses = AverageMeter()

    # Batches
    for i, (data) in enumerate(train_loader):
        # Move to GPU, if available
        padded_input, padded_target, input_lengths = data
        padded_input = padded_input.to(device)
        padded_target = padded_target.to(device)
        input_lengths = input_lengths.to(device)

        # Forward prop.
        pred, gold = model(padded_input, input_lengths, padded_target)
        loss, n_correct = cal_performance(pred,
                                          gold,
                                          smoothing=args.label_smoothing)

        # Back prop.
        optimizer.zero_grad()
        loss.backward()

        # Update weights
        optimizer.step()

        # Keep track of metrics
        losses.update(loss.item())

        # Print status
        if i % print_freq == 0:
            logger.info('Epoch: [{0}][{1}/{2}]\t'
                        'Loss {loss.val:.5f} ({loss.avg:.5f})'.format(
                            epoch, i, len(train_loader), loss=losses))

    return losses.avg
Ejemplo n.º 6
0
def train(model,
          total_batch_size,
          queue,
          optimizer,
          device,
          train_begin,
          train_loader_count,
          print_batch=5,
          teacher_forcing_ratio=1):
    total_loss = 0.
    total_num = 0
    total_dist = 0
    total_length = 0
    total_sent_num = 0
    batch = 0

    model.train()

    logger.info('train() start')

    begin = epoch_begin = time.time()

    while True:
        if queue.empty():
            logger.debug('queue is empty')

        feats, scripts, feat_lengths, script_lengths = queue.get()

        #print("///////////////////////////////////////////")
        #print("aaaaaaaaaaaaaaaaaaaaaaaaaaaa")
        #print(feat_lengths)
        #print(script_lengths)

        if feats.shape[0] == 0:
            # empty feats means closing one loader
            train_loader_count -= 1

            logger.debug('left train_loader: %d' % (train_loader_count))

            if train_loader_count == 0:
                break
            else:
                continue

        optimizer.zero_grad()

        feats = feats.to(device)
        scripts = scripts.to(device)
        sh = scripts.shape
        scripts[:, :sh[1] - 1] = scripts[:, 1:]

        #######################################
        # feats, scripts, feat_lengths
        # print("feats")
        # print(feats.shape)
        # print(feats)
        # print("feat_lengths")
        # print(feat_lengths.shape)
        # print(feat_lengths)
        # print("input_scripts")
        # print(scripts.shape)
        # print(scripts)
        #######################################

        pred, gold = model(feats, feat_lengths,
                           scripts)  #pred: before softmax, gold: label data
        y_hat = pred.max(-1)[1]

        #print("1. input_script", input_scripts.shape, input_scripts)
        #print("2. feats", feats.shape, feats)
        #print("3. logit", logit.shape, logit)

        # print(logit.shape)
        # print(y_hat.shape)
        # print(y_hat)

        # loss pad
        #real_value_index = [scripts.contiguous().view(-1) != 0]
        #loss = criterion(logit.contiguous().view(-1, logit.size(-1)), output_scripts.contiguous().view(-1))

        loss, n_correct = cal_performance(pred, gold, smoothing=0.1)

        #print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
        #print(loss)
        #print(n_correct)
        #exit()

        total_loss += loss
        total_num += sum(feat_lengths)

        display = random.randrange(0, 100) == 0
        dist, length = get_distance(scripts, y_hat, display=display)
        total_dist += dist
        total_length += length

        total_sent_num += scripts.size(0)

        loss.backward()
        optimizer.step()

        if batch % print_batch == 0:
            current = time.time()
            elapsed = current - begin
            epoch_elapsed = (current - epoch_begin) / 60.0
            train_elapsed = (current - train_begin) / 3600.0

            logger.info(
                'batch: {:4d}/{:4d}, loss: {:.6f}, cer: {:.2f}, elapsed: {:.2f}s {:.2f}m {:.2f}h lr:{:.6f}'
                .format(
                    batch,
                    # len(dataloader),
                    total_batch_size,
                    total_loss / total_num,
                    total_dist / total_length,
                    elapsed,
                    epoch_elapsed,
                    train_elapsed,
                    optimizer.lr))
            begin = time.time()

            nsml.report(False,
                        step=train.cumulative_batch_count,
                        train_step__loss=total_loss.item() / total_num.item(),
                        train_step__cer=total_dist / total_length)
        batch += 1
        train.cumulative_batch_count += 1

    logger.info('train() completed')
    return total_loss / total_num, total_dist / total_length