Esempio n. 1
0
def test(args, model, device, test_loader, epoch):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(
                output, target, reduction="sum"
            ).item()  # sum up batch loss
            pred = output.argmax(
                dim=1, keepdim=True
            )  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    Logger.current_logger().report_scalar(
        "test", "loss", iteration=epoch, value=test_loss
    )
    Logger.current_logger().report_scalar(
        "test", "accuracy", iteration=epoch, value=(correct / len(test_loader.dataset))
    )
    print(
        "Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)".format(
            test_loss,
            correct,
            len(test_loader.dataset),
            100.0 * correct / len(test_loader.dataset),
        )
    )
Esempio n. 2
0
def train(args, model, device, train_loader, optimizer, epoch):
    save_loss = []

    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        save_loss.append(loss)

        optimizer.step()
        if batch_idx % args.log_interval == 0:
            Logger.current_logger().report_scalar(
                "train",
                "loss",
                iteration=(epoch * len(train_loader) + batch_idx),
                value=loss.item())
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            # Add manual scalar reporting for loss metrics
            Logger.current_logger().report_scalar(
                title='Scalar example {} - epoch'.format(epoch),
                series='Loss',
                value=loss.item(),
                iteration=batch_idx)
Esempio n. 3
0
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            Logger.current_logger().report_scalar(
                "train",
                "loss",
                iteration=(epoch * len(train_loader) + batch_idx),
                value=loss.item(),
            )
            print(
                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                    epoch,
                    batch_idx * len(data),
                    len(train_loader.dataset),
                    100.0 * batch_idx / len(train_loader),
                    loss.item(),
                )
            )
Esempio n. 4
0
def test(model, device, criterion, test_loader, epoch):
    model.eval()
    epoch_loss = 0
    epoch_acc = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data.float())
            loss = criterion(output, target.float())

            acc = binary_acc(output, target.float())
            epoch_loss += loss.item()
            epoch_acc += acc.item()

        #print(f'Epoch(TEST) {epoch+0:03}: | Loss: {epoch_loss/len(test_loader):.8f} | Acc: {epoch_acc/len(test_loader):.3f}')

    Logger.current_logger().report_scalar("test",
                                          "loss",
                                          iteration=epoch,
                                          value=epoch_loss)
    Logger.current_logger().report_scalar("test",
                                          "accuracy",
                                          iteration=epoch,
                                          value=(acc /
                                                 len(test_loader.dataset)))
    def log_trajectories(model):
        truth_position = model.data_saver["truth_position"]
        predicted_position = model.data_saver["predicted_position"]

        Logger.current_logger().report_scatter3d(title="trajectory",
                                                 series="truth_positions",
                                                 iteration=1,
                                                 scatter=truth_position,
                                                 xaxis="x",
                                                 yaxis="y",
                                                 zaxis="z",
                                                 mode="lines")
        Logger.current_logger().report_scatter3d(title="trajectory",
                                                 series="predicted_position",
                                                 iteration=1,
                                                 scatter=predicted_position,
                                                 xaxis="x",
                                                 yaxis="y",
                                                 zaxis="z",
                                                 mode="lines")
Esempio n. 6
0
def test(args, model, device, test_loader, epoch):
    save_test_loss = []
    save_correct = []

    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(
                output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(
                dim=1,
                keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

            save_test_loss.append(test_loss)
            save_correct.append(correct)

    test_loss /= len(test_loader.dataset)

    Logger.current_logger().report_scalar("test",
                                          "loss",
                                          iteration=epoch,
                                          value=test_loss)
    Logger.current_logger().report_scalar("test",
                                          "accuracy",
                                          iteration=epoch,
                                          value=(correct /
                                                 len(test_loader.dataset)))

    Logger.current_logger().report_histogram(title='Histogram example',
                                             series='correct',
                                             iteration=1,
                                             values=save_correct,
                                             xaxis='Test',
                                             yaxis='Correct')

    matrix = np.array([save_test_loss, save_correct])
    Logger.current_logger().report_confusion_matrix(
        title='Confusion matrix example',
        series='Test loss / correct',
        iteration=1,
        matrix=matrix)

    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
Esempio n. 7
0
def train(model, device, train_loader, criterion, optimizer, epoch):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data.float())
        loss = criterion(output, target.float())
        loss.backward()
        optimizer.step()

        acc = binary_acc(output, target.float())
        epoch_loss += loss.item()
        epoch_acc += acc.item()

        Logger.current_logger().report_scalar(
            "train",
            "loss",
            iteration=(epoch * len(train_loader) + batch_idx),
            value=loss.item())
Esempio n. 8
0
def main():
    # Connecting ClearML with the current process,
    # from here on everything is logged automatically
    import os
    os.environ["AWS_ACCESS_KEY_ID"] = "minioadmin"
    os.environ["AWS_SECRET_ACCESS_KEY"] = "minioadmin"
    task = Task.init(project_name="args.project_name",
                     task_name="args.task_name")
    task.set_base_docker("harbor.io/nvidia/cuda:10.1-devel-ubuntu18.04")
    task.execute_remotely(queue_name="gpu", exit_process=True)

    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument(
        '--project-name',
        type=str,
        default='MNIST',
        help='ML Task Name, such as Classification of numbers in MNIST')
    parser.add_argument('--task-name',
                        type=str,
                        default='2 layer CNN',
                        help='Technique to test, such as 2 layer CNN')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=5,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')

    parser.add_argument('--save-model',
                        action='store_true',
                        default=True,
                        help='For Saving the current Model')
    args = parser.parse_args()

    #    task = Task.init(project_name=args.project_name, task_name=args.task_name)
    #    task.set_base_docker("nvidia/cuda:10.1-runtime-ubuntu18.04")
    #    task.execute_remotely(queue_name="gpu", exit_process=True)

    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        os.path.join('.', 'data'),
        train=True,
        download=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        os.path.join('..', 'data'),
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader, epoch)

    if (args.save_model):
        torch.save(model.state_dict(),
                   os.path.join(gettempdir(), "mnist_cnn.pt"))
    Logger.current_logger().report_text(
        'The default output destination for model snapshots and artifacts is: {}'
        .format(model_snapshots_path))
Esempio n. 9
0
def validate(val_loader, encoder, decoder, criterion, epoch):
    """
    Performs one epoch's validation.

    :param val_loader: DataLoader for validation data.
    :param encoder: encoder model
    :param decoder: decoder model
    :param criterion: loss layer
    :return: BLEU-4 score
    """
    decoder.eval()  # eval mode (no dropout or batchnorm)
    if encoder is not None:
        encoder.eval()

    batch_time = AverageMeter()
    losses = AverageMeter()
    top5accs = AverageMeter()
    # print('during_validation')

    start = time.time()

    references = list(
    )  # references (true captions) for calculating BLEU-4 score
    hypotheses = list()  # hypotheses (predictions)

    # explicitly disable gradient calculation to avoid CUDA memory error
    # solves the issue #57
    with torch.no_grad():
        # Batches
        for i, (imgs, caps, caplens, allcaps) in enumerate(val_loader):

            # Move to device, if available
            imgs = imgs.to(device)
            caps = caps.to(device)
            caplens = caplens.to(device)

            # Forward prop.
            if encoder is not None:
                imgs = encoder(imgs)
            scores, caps_sorted, decode_lengths, alphas, sort_ind = decoder(
                imgs, caps, caplens)

            # Since we decoded starting with <start>, the targets are all words after <start>, up to <end>
            targets = caps_sorted[:, 1:]

            # Remove timesteps that we didn't decode at, or are pads
            # pack_padded_sequence is an easy trick to do this
            scores_copy = scores.clone()
            scores = pack_padded_sequence(
                scores, decode_lengths, batch_first=True
            ).data  #Replace this with the below two options
            targets = pack_padded_sequence(
                targets, decode_lengths, batch_first=True
            ).data  #Replace this with the below two options

            # scores, _ = pack_padded_sequence(scores, decode_lengths, batch_first=True)
            # targets, _ = pack_padded_sequence(targets, decode_lengths, batch_first=True)

            # Calculate loss
            loss = criterion(scores, targets)

            # # Add doubly stochastic attention regularization This is on in the OG
            loss += alpha_c * ((1. - alphas.sum(dim=1))**2).mean()

            # Keep track of metrics
            losses.update(loss.item(), sum(decode_lengths))
            top5 = accuracy(scores, targets, 5)
            top5accs.update(top5, sum(decode_lengths))
            batch_time.update(time.time() - start)

            start = time.time()

            if i % print_freq == 0:
                print(
                    'Validation: [{0}/{1}]\t'
                    'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})\t'.format(
                        i,
                        len(val_loader),
                        batch_time=batch_time,
                        loss=losses,
                        top5=top5accs))

            # Store references (true captions), and hypothesis (prediction) for each image
            # If for n images, we have n hypotheses, and references a, b, c... for each image, we need -
            # references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...]

            # References
            allcaps = allcaps[
                sort_ind]  # because images were sorted in the decoder
            for j in range(allcaps.shape[0]):
                img_caps = allcaps[j].tolist()
                img_captions = list(
                    map(
                        lambda c: [
                            w for w in c if w not in
                            {word_map['<start>'], word_map['<pad>']}
                        ], img_caps))  # remove <start> and pads
                references.append(img_captions)

            # Hypotheses
            _, preds = torch.max(scores_copy, dim=2)
            preds = preds.tolist()
            temp_preds = list()
            for j, p in enumerate(preds):
                temp_preds.append(preds[j][:decode_lengths[j]])  # remove pads
            preds = temp_preds
            hypotheses.extend(preds)

            assert len(references) == len(hypotheses)

        # Calculate BLEU-4 scores
        bleu4 = corpus_bleu(references, hypotheses)

        Logger.current_logger().report_scalar(
            "val",
            "loss",
            iteration=(epoch * len(val_loader) + i),
            value=loss.item())
        Logger.current_logger().report_scalar(
            title='val',
            series='top_5_accuracy',
            value=top5,
            iteration=(epoch * len(val_loader) + i))
        Logger.current_logger().report_scalar(
            title='val',
            series='bleu4',
            value=bleu4,
            iteration=(epoch * len(val_loader) + i))

        print(
            '\n * LOSS - {loss.avg:.3f}, TOP-5 ACCURACY - {top5.avg:.3f}, BLEU-4 - {bleu}\n'
            .format(loss=losses, top5=top5accs, bleu=bleu4))

    return bleu4
Esempio n. 10
0
def train(train_loader, encoder, decoder, criterion, encoder_optimizer,
          decoder_optimizer, epoch):
    """
    Performs one epoch's training.

    :param train_loader: DataLoader for training data
    :param encoder: encoder model
    :param decoder: decoder model
    :param criterion: loss layer
    :param encoder_optimizer: optimizer to update encoder's weights (if fine-tuning)
    :param decoder_optimizer: optimizer to update decoder's weights
    :param epoch: epoch number
    """

    decoder.train()  # train mode (dropout and batchnorm is used)
    encoder.train()

    batch_time = AverageMeter()  # forward prop. + back prop. time
    data_time = AverageMeter()  # data loading time
    losses = AverageMeter()  # loss (per word decoded)
    top5accs = AverageMeter()  # top5 accuracy

    start = time.time()

    # Batches
    for i, (imgs, caps, caplens) in enumerate(train_loader):
        data_time.update(time.time() - start)

        # Move to GPU, if available
        imgs = imgs.to(device)
        caps = caps.to(device)
        caplens = caplens.to(device)

        # Forward prop.
        imgs = encoder(imgs)
        scores, caps_sorted, decode_lengths, alphas, sort_ind = decoder(
            imgs, caps, caplens)
        # print(f'Alphas is {alphas}')
        # Since we decoded starting with <start>, the targets are all words after <start>, up to <end>
        targets = caps_sorted[:, 1:]

        # Remove timesteps that we didn't decode at, or are pads
        # pack_padded_sequence is an easy trick to do this
        scores = pack_padded_sequence(
            scores, decode_lengths,
            batch_first=True).data  #Replace this with the below two options
        targets = pack_padded_sequence(
            targets, decode_lengths,
            batch_first=True).data  #Replace this with the below two options

        # scores = pack_padded_sequence(scores, decode_lengths, batch_first=True)
        # targets = pack_padded_sequence(targets, decode_lengths, batch_first=True)

        # Calculate loss
        loss = criterion(scores, targets)

        # # Add doubly stochastic attention regularization this is on in the OG
        loss += alpha_c * ((1. - alphas.sum(dim=1))**2).mean()

        # Back prop.
        decoder_optimizer.zero_grad()
        if encoder_optimizer is not None:
            encoder_optimizer.zero_grad()
        loss.backward()

        # Clip gradients
        if grad_clip is not None:
            clip_gradient(decoder_optimizer, grad_clip)
            if encoder_optimizer is not None:
                clip_gradient(encoder_optimizer, grad_clip)

        # Update weights
        decoder_optimizer.step()
        if encoder_optimizer is not None:
            encoder_optimizer.step()

        # Keep track of metrics
        top5 = accuracy(scores, targets, 5)
        losses.update(loss.item(), sum(decode_lengths))
        top5accs.update(top5, sum(decode_lengths))
        batch_time.update(time.time() - start)

        start = time.time()

        if i % 5 == 0:
            Logger.current_logger().report_scalar(
                "train",
                "loss",
                iteration=(epoch * len(train_loader) + i),
                value=loss.item())
            Logger.current_logger().report_scalar(
                title='train',
                series='top_5_accuracy',
                value=top5,
                iteration=(epoch * len(train_loader) + i))
            tensorboard_writer.add_scalar('loss/epoch', loss, epoch)
        # Print status
        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data Load Time {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top5=top5accs))
Esempio n. 11
0
#
import os
from clearml import Task, Logger

# Connecting ClearML with the current process,
# from here on everything is logged automatically
task = Task.init(project_name="examples",
                 task_name="Audio and video reporting")

print('reporting audio and video samples to the debug samples section')

# report video, an already uploaded video media (url)
Logger.current_logger().report_media(
    'video',
    'big bunny',
    iteration=1,
    url=
    'https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/720/Big_Buck_Bunny_720_10s_1MB.mp4'
)

#  report audio, report an already uploaded audio media (url)
Logger.current_logger().report_media(
    'audio',
    'pink panther',
    iteration=1,
    url='https://www2.cs.uic.edu/~i101/SoundFiles/PinkPanther30.wav')

#  report audio, report local media audio file
Logger.current_logger().report_media('audio',
                                     'tada',
                                     iteration=1,