def test_full_pytorch_example(large_mock_mnist_data, tmpdir):
    # First, generate mock dataset
    dataset_url = 'file://{}'.format(tmpdir)
    mnist_data_to_pycarbon_dataset(tmpdir,
                                   dataset_url,
                                   mnist_data=large_mock_mnist_data,
                                   spark_master='local[1]',
                                   carbon_files_count=1)

    # Next, run a round of training using the pytorce adapting data loader
    from petastorm.pytorch import DataLoader

    torch.manual_seed(1)
    device = torch.device('cpu')
    model = pytorch_example.Net().to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

    transform = TransformSpec(pytorch_example._transform_row,
                              removed_fields=['idx'])

    with DataLoader(make_carbon_reader('{}/train'.format(dataset_url),
                                       reader_pool_type='dummy',
                                       num_epochs=1,
                                       transform_spec=transform),
                    batch_size=32) as train_loader:
        pytorch_example.train(model, device, train_loader, 10, optimizer, 1)

    with DataLoader(make_carbon_reader('{}/test'.format(dataset_url),
                                       reader_pool_type='dummy',
                                       num_epochs=1,
                                       transform_spec=transform),
                    batch_size=100) as test_loader:
        pytorch_example.evaluation(model, device, test_loader)
def generate_mnist_dataset(small_mock_mnist_data, tmpdir_factory):
    # Using carbon_files_count to speed up the test
    path = tmpdir_factory.mktemp('data').strpath
    dataset_url = 'file://{}'.format(path)
    mnist_data_to_pycarbon_dataset(path,
                                   dataset_url,
                                   mnist_data=small_mock_mnist_data,
                                   spark_master='local[1]',
                                   carbon_files_count=1)
    return path
Ejemplo n.º 3
0
def test_full_mxnet_example(large_mock_mnist_data, tmpdir):
  # First, generate mock dataset
  dataset_url = 'file://{}'.format(tmpdir)
  mnist_data_to_pycarbon_dataset(tmpdir, dataset_url, mnist_data=large_mock_mnist_data,
                                 spark_master='local[1]', carbon_files_count=1)

  mxnet_example.mnist_iter_test(
    dataset_path=dataset_url + "/train",
    num_epoch=1,
    batch_size=100
  )
Ejemplo n.º 4
0
def test_full_tf_example_unifeid(large_mock_mnist_data, tmpdir):
    # First, generate mock dataset
    dataset_url = 'file://{}'.format(tmpdir)
    mnist_data_to_pycarbon_dataset(tmpdir,
                                   dataset_url,
                                   mnist_data=large_mock_mnist_data,
                                   spark_master='local[1]',
                                   carbon_files_count=1)

    start = time.time()
    # Tensorflow train and test
    tf_example_unified.train_and_test(dataset_url=dataset_url,
                                      training_iterations=10,
                                      batch_size=10,
                                      evaluation_interval=10,
                                      start=start)