def test(model, test_loader, cuda):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(
            output, target, size_average=False).data[0]  # sum up batch loss
        pred = output.data.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    accuracy = correct / len(test_loader.dataset)
    logging.info(
        'Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset), 100. * accuracy))
    output_path = get_outputs_path()
    model_path = os.path.join(output_path, "model.dat")
    torch.save(model.state_dict(), model_path)

    send_metrics(loss=test_loss.item(), accuracy=accuracy.item())
Example #2
0
def main(argv=sys.argv[1:]):

    argv.extend(['-f', get_outputs_path()])

    cartpole_client.main(argv)

    send_metrics(score=cartpole_client.RESULTS[0]['score'])
Example #3
0
def report_progress(epoch, best, losses, scores):
    print("{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}".format(  # print a simple table
        losses["textcat"],
        scores["textcat_p"],
        scores["textcat_r"],
        scores["textcat_f"],
    ))
    send_metrics(
        epoch=epoch,
        best_acc=best,
        loss=losses["textcat"],
        P=scores["textcat_p"],
        R=scores["textcat_r"],
        F=scores["textcat_f"],
    )
Example #4
0
def distributed_train(model, train_set, epoch, optimizer, rank, num_batches,
                      log_interval):
    """ Distributed Synchronous SGD Example """
    epoch_loss = 0.0
    train_dataset = [d for d in train_set]
    for batch_idx, (data, target) in enumerate(train_dataset):
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        epoch_loss += loss.data[0]
        loss.backward()
        average_gradients(model)
        optimizer.step()
        if batch_idx % log_interval == 0:
            logging.info(
                'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_set.dataset),
                    100. * batch_idx / len(train_dataset), loss.data[0]))
    logging.info('Rank {}, epoch: {}, loss: {}'.format(
        rank, epoch, epoch_loss / num_batches))
    send_metrics(loss=epoch_loss / num_batches)
Example #5
0
    for epoch in range(start_epoch, args.epochs):
        train_model(model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    train_dataset=train_dataset,
                    loss_fn=loss_fn,
                    num_epochs=args.epochs,
                    epoch=epoch,
                    batch_size=args.batch_size,
                    notify=args.notify,
                    cuda=cuda)
        accuracy = eval_model(model=model, test_loader=test_loader, cuda=cuda)
        accuracy = 100. * accuracy / len(test_loader.dataset)

        logging.info('Test Accuracy: {:.2f}%'.format(accuracy))
        send_metrics(accuracy=accuracy)

        # Save checkpoint logic
        accuracy = torch.FloatTensor([accuracy])
        best_accuracy = torch.FloatTensor(max(accuracy.numpy(), best_accuracy.numpy()))
        if bool(accuracy.numpy() > best_accuracy.numpy()):
            logging.info('Saving new state for epoch {}'.format(epoch))
            state = {
                'epoch': epoch + 1,
                'state': model.state_dict(),
                'accuracy': best_accuracy
            }
            torch.save(state, get_weight_filename())
        else:
            logging.info('State did not change for epoch {}'.format(epoch))
Example #6
0
    # ResNet20  | 3 (2)| 92.16     | 91.25     | -----     | -----     | 35 (---)
    # ResNet32  | 5(NA)| 92.46     | 92.49     | NA        | NA        | 50 ( NA)
    # ResNet44  | 7(NA)| 92.50     | 92.83     | NA        | NA        | 70 ( NA)
    # ResNet56  | 9 (6)| 92.71     | 93.03     | 93.01     | NA        | 90 (100)
    # ResNet110 |18(12)| 92.65     | 93.39+-.16| 93.15     | 93.63     | 165(180)
    # ResNet164 |27(18)| -----     | 94.07     | -----     | 94.54     | ---(---)
    # ResNet1001| (111)| -----     | 92.39     | -----     | 95.08+-.14| ---(---)
    # ---------------------------------------------------------------------------

    # Model name, depth and version
    depth = get_depth(version=args.version,
                      model_depth_param=args.model_depth_param)
    model_type = 'ResNet%dv%d' % (depth, args.version)

    # Subtracting pixel mean improves accuracy
    subtract_pixel_mean = True
    # Data
    train_data, test_data, input_shape = get_data(subtract_pixel_mean,
                                                  args.num_classes)

    # Score trained model.
    model = get_model(version=args.version,
                      input_shape=input_shape,
                      depth=depth)
    train(model, model_type, train_data['x'], train_data['y'], test_data['x'],
          test_data['y'], args.data_augmentation, args.batch_size, args.epochs)
    scores = model.evaluate(test_data['x'], test_data['y'], verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
    send_metrics(loss=scores[0], accuracy=scores[1])