def pytorch_hello_world(dataset_url='file:///tmp/carbon_pycarbon_dataset'): with DataLoader(make_reader(dataset_url, is_batch=False)) as train_loader: sample = next(iter(train_loader)) print(sample['id']) with make_data_loader(make_reader(dataset_url, is_batch=False)) as train_loader: sample = next(iter(train_loader)) print(sample['id'])
def pytorch_hello_world(dataset_url='file:///tmp/carbon_external_dataset'): with DataLoader(make_reader(dataset_url)) as train_loader: sample = next(iter(train_loader)) # Because we are using make_batch_reader(), each read returns a batch of rows instead of a single row print("id batch: {0}".format(sample['id'])) with make_data_loader(make_reader(dataset_url)) as train_loader: sample = next(iter(train_loader)) # Because we are using make_batch_reader(), each read returns a batch of rows instead of a single row print("id batch: {0}".format(sample['id']))
def test_full_pytorch_example_unified(large_mock_mnist_data, tmpdir): # First, generate mock dataset dataset_url = 'file://{}'.format(tmpdir) mnist_data_to_pycarbon_dataset(tmpdir, dataset_url, mnist_data=large_mock_mnist_data, spark_master='local[1]', carbon_files_count=1) # Next, run a round of training using the pytorce adapting data loader from pycarbon.reader import make_data_loader torch.manual_seed(1) device = torch.device('cpu') model = pytorch_example.Net().to(device) optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5) transform = TransformSpec(pytorch_example._transform_row, removed_fields=['idx']) with make_data_loader(make_reader('{}/train'.format(dataset_url), is_batch=False, reader_pool_type='thread', num_epochs=1, transform_spec=transform), batch_size=32) as train_loader: pytorch_example_unified.train(model, device, train_loader, 10, optimizer, 1) with make_data_loader(make_reader('{}/test'.format(dataset_url), is_batch=False, reader_pool_type='thread', num_epochs=1, transform_spec=transform), batch_size=100) as test_loader: pytorch_example_unified.evaluation(model, device, test_loader)
def main(): # Training settings parser = argparse.ArgumentParser(description='Pycarbon MNIST Example') default_dataset_url = 'file://{}'.format(DEFAULT_MNIST_DATA_PATH) parser.add_argument( '--dataset-url', type=str, default=default_dataset_url, metavar='S', help='hdfs:// or file:/// URL to the MNIST pycarbon dataset ' '(default: %s)' % default_dataset_url) parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--all-epochs', action='store_true', default=False, help='train all epochs before testing accuracy/loss') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--carbon-sdk-path', type=str, default=DEFAULT_CARBONSDK_PATH, help='carbon sdk path') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() jnius_config.set_classpath(args.carbon_sdk_path) torch.manual_seed(args.seed) device = torch.device('cuda' if use_cuda else 'cpu') model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) # Configure loop and Reader epoch for illustrative purposes. # Typical training usage would use the `all_epochs` approach. # if args.all_epochs: # Run training across all the epochs before testing for accuracy loop_epochs = 1 reader_epochs = args.epochs else: # Test training accuracy after each epoch loop_epochs = args.epochs reader_epochs = 1 transform = TransformSpec(_transform_row, removed_fields=['idx']) # Instantiate each pycarbon Reader with a single thread, shuffle enabled, and appropriate epoch setting for epoch in range(1, loop_epochs + 1): with make_data_loader(make_reader('{}/train'.format(args.dataset_url), is_batch=False, num_epochs=reader_epochs, transform_spec=transform), batch_size=args.batch_size) as train_loader: train(model, device, train_loader, args.log_interval, optimizer, epoch) with make_data_loader(make_reader('{}/test'.format(args.dataset_url), is_batch=False, num_epochs=reader_epochs, transform_spec=transform), batch_size=args.test_batch_size) as test_loader: evaluation(model, device, test_loader)