Ejemplo n.º 1
0
def test_mnist_examples():

    mnist_train_uri = "http://bit.ly/2uqXxrk"
    mnist_test_uri = "http://bit.ly/2NVFGQd"

    from dtoolai.data import TensorDataSet

    tds = TensorDataSet(mnist_train_uri)
    assert tds.name == "mnist.train"
    assert len(tds) == 60000

    tds = TensorDataSet(mnist_test_uri)
    assert tds.name == "mnist.test"
    assert len(tds) == 10000
Ejemplo n.º 2
0
def main(dataset_uri):

    tds = TensorDataSet(dataset_uri)

    print(f"Dataset has {len(tds)} items")

    print(Counter(tds.labels))
Ejemplo n.º 3
0
def main(tensor_dataset_uri, output_prefix):

    tds = TensorDataSet(tensor_dataset_uri)

    im, label = tds[30]

    im_tr = np.transpose(im, (1, 2, 0))
    output_fpath = f"{output_prefix}_{label}.png"
    imsave(output_fpath, im_tr)
Ejemplo n.º 4
0
def test_tensor_dataset_functional():

    from dtoolai.data import TensorDataSet

    tds_uri = os.path.join(TEST_SAMPLE_DATA, "example_tensor_dataset")

    tds = TensorDataSet(tds_uri)
    assert tds.name == "example_tensor_dataset"
    assert tds.uuid == "6b6f9a0e-8547-4903-9090-6dcfc6abdf83"
    assert len(tds) == 100

    data, label = tds[0]
    assert data.shape == (1, 9, 9)
    assert data[0][0][0] == 0
    assert label == 0

    assert tds.input_channels == 1
    assert tds.dim == 9
def main(model_ds_uri, test_ds_uri):

    tds = TensorDataSet(test_ds_uri)
    model = TrainedTorchModel(model_ds_uri)

    # model = model_from_uri(model_ds_uri)

    dl = DataLoader(tds, batch_size=128)

    model.model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, label in dl:
            Y_pred = model.model(data)
            test_loss += F.nll_loss(Y_pred, label).item()
            pred = Y_pred.argmax(dim=1, keepdim=True)
            correct += pred.eq(label.view_as(pred)).sum().item()

    # print("Test loss:", test_loss)
    print(f"{correct}/{len(tds)} correct")
def main(train_dataset_uri, output_base_uri, output_name, params, test_dataset_uri):
    """Train a classifier from a tensor DataSet.

    \b
    Required arguments:
    train_dataset_uri: The URI of the data that will be used to train the model.
    output_base_uri:   The base URI at which the output model DataSet will be created.
                       Local file URIS (file:///path/to/file) can be specified as absolute
                       or relative filesystem paths, which will be converted to full URIs.
    output_name:       The name for the output DataSet.  

    \b
    Optional arguments:
    params:            A comma separated list of parameters that will override defaults for
                       training the model.
    test_dataset_uri   A URI with a test DataSet that will be used to evaluate the model.
                       during and after training. As for output_base_uri, filesyste, paths
                       can be used and will be converted to file URIS.  
    """

    input_ds_train = TensorDataSet(train_dataset_uri)   

    model_params = Parameters(
        batch_size=128,
        learning_rate=0.01,
        n_epochs=1
    )
    if params:
        model_params.update_from_comma_separated_string(params)

    train_cnn_from_tensor_dataset(
        input_ds_train,
        output_base_uri,
        output_name,
        model_params
    )
Ejemplo n.º 7
0
def test_end_to_end_tensors_with_mnist(tmp_dir_fixture):

    data_cache_dirpath = os.path.join(TEST_SAMPLE_DATA, "torch")
    mnist_torch = torchvision.datasets.MNIST(data_cache_dirpath, download=True)
    data = mnist_torch.data.numpy().reshape(60000, -1)
    labels = mnist_torch.targets
    # mnist_numpy_uri = os.path.join(TEST_SAMPLE_DATA, "mnist.numpy")
    # mnist_ds = dtoolcore.DataSet.from_uri(mnist_numpy_uri)
    # data_dtoolcore.utils.generate_identifier("data.npy")
    # data_fpath = mnist_ds.item_content_abspath

    # mnist_train_numpy_dirname = "mnist_train_numpy"
    # data_fpath = os.path.join(TEST_SAMPLE_DATA, mnist_train_numpy_dirname, "data.npy")
    # labels_fpath = os.path.join(TEST_SAMPLE_DATA, mnist_train_numpy_dirname, "labels.npy")

    # data = np.load(data_fpath)
    assert data.shape == (60000, 784)
    # labels = np.load(labels_fpath)
    assert labels.shape == (60000,)

    from dtoolai.data import create_tensor_dataset_from_arrays

    create_tensor_dataset_from_arrays(
        tmp_dir_fixture,
        "mnist.train",
        data,
        labels,
        (1, 28, 28),
        ""
    )

    from dtoolai.data import TensorDataSet

    mnist_train_uri = os.path.join(tmp_dir_fixture, "mnist.train")
    tds = TensorDataSet(mnist_train_uri)
    assert len(tds) == 60000

    dl = DataLoader(tds, batch_size=128, shuffle=True)
    data, labels = next(iter(dl))
    assert data.shape == (128, 1, 28, 28)

    from dtoolai.models import GenNet
    init_params = dict(input_channels=tds.input_channels, input_dim=tds.dim)
    model = GenNet(**init_params)
    loss_fn = F.nll_loss
    optimiser = optim.SGD(model.parameters(), lr=0.01)

    from dtoolai.utils import train
    train(model, dl, optimiser, loss_fn, 3)

    model.eval()
    tds_test = TensorDataSet("http://bit.ly/2NVFGQd")
    dl_test = DataLoader(tds_test, batch_size=128)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    assert len(tds_test) == 10000
    correct = 0
    with torch.no_grad():
        for data, label in dl_test:
            data = data.to(device)
            label = label.to(device)
            Y_pred = model(data)
            pred = Y_pred.argmax(dim=1, keepdim=True)
            correct += pred.eq(label.view_as(pred)).sum().item()

    assert correct >= 6000