Esempio n. 1
0
def pytorch_hello_world(dataset_url='file:///tmp/carbon_pycarbon_dataset'):
    with DataLoader(make_reader(dataset_url, is_batch=False)) as train_loader:
        sample = next(iter(train_loader))
        print(sample['id'])

    with make_data_loader(make_reader(dataset_url,
                                      is_batch=False)) as train_loader:
        sample = next(iter(train_loader))
        print(sample['id'])
Esempio n. 2
0
def pytorch_hello_world(dataset_url='file:///tmp/carbon_external_dataset'):
    with DataLoader(make_reader(dataset_url)) as train_loader:
        sample = next(iter(train_loader))
        # Because we are using make_batch_reader(), each read returns a batch of rows instead of a single row
        print("id batch: {0}".format(sample['id']))

    with make_data_loader(make_reader(dataset_url)) as train_loader:
        sample = next(iter(train_loader))
        # Because we are using make_batch_reader(), each read returns a batch of rows instead of a single row
        print("id batch: {0}".format(sample['id']))
def test_full_pytorch_example_unified(large_mock_mnist_data, tmpdir):
    # First, generate mock dataset
    dataset_url = 'file://{}'.format(tmpdir)
    mnist_data_to_pycarbon_dataset(tmpdir,
                                   dataset_url,
                                   mnist_data=large_mock_mnist_data,
                                   spark_master='local[1]',
                                   carbon_files_count=1)

    # Next, run a round of training using the pytorce adapting data loader
    from pycarbon.reader import make_data_loader

    torch.manual_seed(1)
    device = torch.device('cpu')
    model = pytorch_example.Net().to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

    transform = TransformSpec(pytorch_example._transform_row,
                              removed_fields=['idx'])

    with make_data_loader(make_reader('{}/train'.format(dataset_url),
                                      is_batch=False,
                                      reader_pool_type='thread',
                                      num_epochs=1,
                                      transform_spec=transform),
                          batch_size=32) as train_loader:
        pytorch_example_unified.train(model, device, train_loader, 10,
                                      optimizer, 1)

    with make_data_loader(make_reader('{}/test'.format(dataset_url),
                                      is_batch=False,
                                      reader_pool_type='thread',
                                      num_epochs=1,
                                      transform_spec=transform),
                          batch_size=100) as test_loader:
        pytorch_example_unified.evaluation(model, device, test_loader)
Esempio n. 4
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='Pycarbon MNIST Example')
    default_dataset_url = 'file://{}'.format(DEFAULT_MNIST_DATA_PATH)
    parser.add_argument(
        '--dataset-url',
        type=str,
        default=default_dataset_url,
        metavar='S',
        help='hdfs:// or file:/// URL to the MNIST pycarbon dataset '
        '(default: %s)' % default_dataset_url)
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--all-epochs',
                        action='store_true',
                        default=False,
                        help='train all epochs before testing accuracy/loss')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--carbon-sdk-path',
                        type=str,
                        default=DEFAULT_CARBONSDK_PATH,
                        help='carbon sdk path')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    jnius_config.set_classpath(args.carbon_sdk_path)

    torch.manual_seed(args.seed)

    device = torch.device('cuda' if use_cuda else 'cpu')

    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)

    # Configure loop and Reader epoch for illustrative purposes.
    # Typical training usage would use the `all_epochs` approach.
    #
    if args.all_epochs:
        # Run training across all the epochs before testing for accuracy
        loop_epochs = 1
        reader_epochs = args.epochs
    else:
        # Test training accuracy after each epoch
        loop_epochs = args.epochs
        reader_epochs = 1

    transform = TransformSpec(_transform_row, removed_fields=['idx'])

    # Instantiate each pycarbon Reader with a single thread, shuffle enabled, and appropriate epoch setting
    for epoch in range(1, loop_epochs + 1):
        with make_data_loader(make_reader('{}/train'.format(args.dataset_url),
                                          is_batch=False,
                                          num_epochs=reader_epochs,
                                          transform_spec=transform),
                              batch_size=args.batch_size) as train_loader:
            train(model, device, train_loader, args.log_interval, optimizer,
                  epoch)

        with make_data_loader(make_reader('{}/test'.format(args.dataset_url),
                                          is_batch=False,
                                          num_epochs=reader_epochs,
                                          transform_spec=transform),
                              batch_size=args.test_batch_size) as test_loader:
            evaluation(model, device, test_loader)