Ejemplo n.º 1
0
def test_get_input_streams(use_test_config_dir):
    assert valohai.inputs("single_image").stream().read(10000)
    assert len(list(valohai.inputs("input_with_archive").streams())) == 2
    for stream in valohai.inputs("input_with_archive").streams():
        assert stream.read(10000)
    assert valohai.inputs("single_image").stream().read()
    assert not valohai.inputs("nonono").stream()
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step batch_inference.py`

    valohai.prepare(
        step='batch-inference',
        image='tensorflow/tensorflow:2.6.0',
        default_inputs={
            'model': {
                'default': None,
                'optional': False,
            },
            'images': [
                'https://valohaidemo.blob.core.windows.net/mnist/four-inverted.png',
                'https://valohaidemo.blob.core.windows.net/mnist/five-inverted.png',
                'https://valohaidemo.blob.core.windows.net/mnist/five-normal.jpg',
            ],
        },
    )

    print('Loading model')
    model_path = valohai.inputs('model').path()
    model = load_model(model_path)

    json_blob = {}
    for image_path in valohai.inputs('images').paths():
        filename = os.path.basename(image_path)

        extension = os.path.splitext(image_path)[1].lower()
        if extension not in ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']:
            print(f'{filename} is not an image file')
            continue

        print(f'Running inference for {filename}')
        try:
            image, inverted = process_image(Image.open(image_path))
            prediction = predict_image(model, image, inverted)
            json_blob[filename] = prediction
            print(filename, prediction)
        except Exception as exc:
            json_blob[filename] = {'error': exc}
            print(f'Unable to process {filename}: {exc}', file=sys.stderr)

    print('Saving predictions')
    suffix = ''
    try:
        suffix = f'-{model_path.split("model-")[1].split(".h5")[0]}'
    except IndexError:
        print(f'Unable to get suffix from {model_path}')

    json_path = os.path.join(
        valohai.outputs().path(f'predictions{suffix}.json'))
    with open(json_path, 'w') as json_file:
        json.dump(json_blob, json_file, sort_keys=True)
Ejemplo n.º 3
0
def test_zip_no_mangling(use_test_config_dir):
    paths = set(valohai.inputs("input_with_archive").paths())
    for suffix in (
            "1hello.txt",
            "2world.txt",
            "blerp/3katt.txt",
            "blerp/blonk/4blöf.txt",
            "blerp/blonk/asdf.jpg",
    ):
        assert any(p.endswith(suffix) for p in paths)
Ejemplo n.º 4
0
def test_get_input_paths(use_test_config_dir):
    assert valohai.inputs("single_image").path().endswith(
        "single_image/Example.jpg")
    assert os.path.exists(valohai.inputs("single_image").path())
    assert (valohai.inputs("single_image").path(
        default="unused_default").endswith("single_image/Example.jpg"))
    assert not valohai.inputs("nonono").path()
    assert valohai.inputs("nonono").path(
        default="default_123") == "default_123"
    assert os.path.exists(valohai.inputs("input_with_archive").path())
    for path in valohai.inputs("input_with_archive").paths():
        assert os.path.exists(path)
    assert len(list(valohai.inputs("input_with_archive").paths())) == 2
Ejemplo n.º 5
0
def test_get_input_streams(use_test_config_dir):
    assert valohai.inputs("single_image").stream().read(10000)
    assert len(list(valohai.inputs("input_with_archive").streams())) == 5

    for stream in valohai.inputs("input_with_archive").streams():
        assert stream.read(10000)
    assert valohai.inputs("single_image").stream().read()
    assert not valohai.inputs("nonono").stream()
    assert (len(
        list(
            valohai.inputs("images_in_subdirs").streams(
                "hello/**/hello/*.jpg"))) == 2)
    assert len(
        list(valohai.inputs("images_in_subdirs").streams(
            "hello/**/*.jpg"))) == 2
    assert len(list(
        valohai.inputs("images_in_subdirs").streams("**/*.jpg"))) == 2
    for stream in valohai.inputs("images_in_subdirs").streams("**/*.jpg"):
        assert stream.read(10000)
Ejemplo n.º 6
0
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step preprocess_dataset.py`

    valohai.prepare(
        step='preprocess-dataset',
        image='python:3.9',
        default_inputs={
            'dataset':
            'https://valohaidemo.blob.core.windows.net/mnist/mnist.npz',
        },
    )

    # Read input files from Valohai inputs directory
    # This enables Valohai to version your training data
    # and cache the data for quick experimentation

    print('Loading data')
    with np.load(valohai.inputs('dataset').path(), allow_pickle=True) as file:
        x_train, y_train = file['x_train'], file['y_train']
        x_test, y_test = file['x_test'], file['y_test']

    print('Preprocessing data')
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # Write output files to Valohai outputs directory
    # This enables Valohai to version your data
    # and upload output it to the default data store

    print('Saving preprocessed data')
    path = valohai.outputs().path('preprocessed_mnist.npz')
    np.savez_compressed(path,
                        x_train=x_train,
                        y_train=y_train,
                        x_test=x_test,
                        y_test=y_test)
Ejemplo n.º 7
0
import numpy as np
import valohai as vh


vh.prepare(step='Preprocess data')

# Read input files from Valohai inputs directory
# This enables Valohai to version your training data
# and cache the data for quick experimentation

with np.load(vh.inputs('mnist').path(), allow_pickle=True) as file:
    x_train, y_train = file['x_train'], file['y_train']
    x_test, y_test = file['x_test'], file['y_test']

x_train, x_test = x_train / 255.0, x_test / 255.0


# Write output files to Valohai outputs directory
# This enables Valohai to version your data 
# and upload output it to the default data store

path = vh.outputs('dataset').path('preprocessed_mnist.npz')
np.savez(path, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
Ejemplo n.º 8
0
import numpy as np
import tensorflow as tf
import valohai as vh

vh.prepare(step='Train model')

# Read input files from Valohai inputs directory
# This enables Valohai to version your training data
# and cache the data for quick experimentation

with np.load(vh.inputs('preprocessed_mnist').path(), allow_pickle=True) as f:
    x_train, y_train = f['x_train'], f['y_train']
    x_test, y_test = f['x_test'], f['y_test']

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(
    learning_rate=vh.parameters('learning_rate').value)
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Print metrics out as JSON
# This enables Valohai to version your metadata
# and for you to use it to compare experiments
Ejemplo n.º 9
0
def test_get_input_paths(use_test_config_dir):
    assert valohai.inputs("single_image").path().endswith(
        "single_image/Example.jpg")
    assert os.path.exists(valohai.inputs("single_image").path())
    assert os.path.exists(valohai.inputs("single_image").dir_path())
    assert os.path.isdir(valohai.inputs("single_image").dir_path())

    assert (valohai.inputs("single_image").path(
        default="unused_default").endswith("single_image/Example.jpg"))

    assert (valohai.inputs("single_image").path("Example.jpg").endswith(
        "single_image/Example.jpg"))
    assert (valohai.inputs("single_image").path("*.jpg").endswith(
        "single_image/Example.jpg"))
    assert (valohai.inputs("single_image").path("E*").endswith(
        "single_image/Example.jpg"))
    assert valohai.inputs("single_image").path("*").endswith(
        "single_image/Example.jpg")
    assert not valohai.inputs("single_image").path("notbefound*")
    assert next(valohai.inputs("single_image").paths("Example.jpg")).endswith(
        "single_image/Example.jpg")
    assert next(valohai.inputs("single_image").paths("*.jpg")).endswith(
        "single_image/Example.jpg")
    assert next(valohai.inputs("single_image").paths("E*")).endswith(
        "single_image/Example.jpg")
    assert next(valohai.inputs("single_image").paths("*")).endswith(
        "single_image/Example.jpg")
    assert len(list(valohai.inputs("single_image").paths("notbefound*"))) == 0

    assert not valohai.inputs("nonono").path()
    assert valohai.inputs("nonono").path(
        default="default_123") == "default_123"
    assert os.path.exists(valohai.inputs("input_with_archive").path())
    assert len(list(valohai.inputs("input_with_archive").paths())) == 5
    assert len(list(
        valohai.inputs("input_with_archive").paths("**/*.txt"))) == 2
    assert len(list(
        valohai.inputs("input_with_archive").paths("**/a*.jpg"))) == 1
    assert next(valohai.inputs("input_with_archive").paths(
        "**/a*.jpg")).endswith("blerp/blonk/asdf.jpg")
    assert next(valohai.inputs("input_with_archive").paths(
        "**/asdf.jpg")).endswith("blerp/blonk/asdf.jpg")
    assert next(
        valohai.inputs("input_with_archive").paths(
            "blerp/blonk/asdf.jpg")).endswith("blerp/blonk/asdf.jpg")

    for path in valohai.inputs("input_with_archive").paths():
        assert os.path.exists(path)

    assert len(list(valohai.inputs("input_with_archive").paths())) == 5

    for path in valohai.inputs("input_with_archive").paths("**/*.jpg"):
        assert os.path.exists(path)

    assert next(
        valohai.inputs("images_in_subdirs").paths(
            "hello/label1/hello/*.jpg")).endswith("label1/hello/foo.jpg")
    assert next(
        valohai.inputs("images_in_subdirs").paths(
            "hello/label2/hello/*.jpg")).endswith("label2/hello/foo.jpg")
    assert (len(
        list(
            valohai.inputs("images_in_subdirs").paths("hello/**/hello/*.jpg")))
            == 2)
    assert len(
        list(valohai.inputs("images_in_subdirs").paths("hello/**/*.jpg"))) == 2
    assert len(list(
        valohai.inputs("images_in_subdirs").paths("**/*.jpg"))) == 2
    for path in valohai.inputs("images_in_subdirs").paths("**/*.jpg"):
        assert os.path.exists(path)
Ejemplo n.º 10
0
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step train_model.py`

    valohai.prepare(
        step='train-model',
        image='tensorflow/tensorflow:2.6.0',
        default_inputs={
            'dataset':
            'https://valohaidemo.blob.core.windows.net/mnist/preprocessed_mnist.npz',
        },
        default_parameters={
            'learning_rate': 0.001,
            'epochs': 5,
        },
    )

    # Read input files from Valohai inputs directory
    # This enables Valohai to version your training data
    # and cache the data for quick experimentation

    input_path = valohai.inputs('dataset').path()
    with np.load(input_path, allow_pickle=True) as f:
        x_train, y_train = f['x_train'], f['y_train']
        x_test, y_test = f['x_test'], f['y_test']

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10),
    ])

    optimizer = tf.keras.optimizers.Adam(
        learning_rate=valohai.parameters('learning_rate').value)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

    # Print metrics out as JSON
    # This enables Valohai to version your metadata
    # and for you to use it to compare experiments

    callback = tf.keras.callbacks.LambdaCallback(on_epoch_end=log_metadata)
    model.fit(x_train,
              y_train,
              epochs=valohai.parameters('epochs').value,
              callbacks=[callback])

    # Evaluate the model and print out the test metrics as JSON

    test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
    with valohai.logger() as logger:
        logger.log('test_accuracy', test_accuracy)
        logger.log('test_loss', test_loss)

    # Write output files to Valohai outputs directory
    # This enables Valohai to version your data
    # and upload output it to the default data store

    suffix = uuid.uuid4()
    output_path = valohai.outputs().path(f'model-{suffix}.h5')
    model.save(output_path)
Ejemplo n.º 11
0
def test_prepare(tmpdir, monkeypatch):
    inputs_dir = str(tmpdir.mkdir("inputs"))
    monkeypatch.setenv("VH_INPUTS_DIR", inputs_dir)
    local_file = tmpdir.mkdir("sub").join("hello.txt")
    local_file.write("tiku ja taku ja joku")

    data_dir = tmpdir.mkdir("data")
    local_data = data_dir.join("data1.dat")
    local_data.write("I'm a big data")
    local_data2 = data_dir.join("data2.dat")
    local_data2.write("I'm a huge data")

    parameters = {
        "iambool": True,
        "mestringy": "asdf",
        "integerboi": 123,
        "floaty": 0.0001,
        "makemetrue": False,
        "makemefalse": True,
        "makemeqwer": "asdf",
        "makeme321": 123,
        "makemenegative": 0.0001,
    }
    inputs = {
        "example":
        "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz",
        "overrideme":
        "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz",
        "myimages": [
            "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg",
            "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png",
        ],
        "localdata_as_list": [str(local_data),
                              str(local_data2)],
        "localdata_with_wildcard":
        os.path.join(str(data_dir), "*.dat"),
    }

    with monkeypatch.context() as m:
        args = [
            "",
            "--makemetrue=true",
            "--makemefalse=false",
            "--makemeqwer=qwer",
            "--makeme321=321",
            "--makemenegative=-0.123",
            "--some_totally_random_parameter_to_ignore=666",
            f"--overrideme={str(local_file)}",
        ]
        m.setattr(
            sys,
            "argv",
            args,
        )
        valohai.prepare(step="test",
                        default_parameters=parameters,
                        default_inputs=inputs)

    assert valohai.parameters("iambool").value is True
    assert valohai.parameters("mestringy").value == "asdf"
    assert valohai.parameters("integerboi").value == 123
    assert valohai.parameters("floaty").value == 0.0001
    assert valohai.parameters("makemetrue").value is True
    assert valohai.parameters("makemefalse").value is False
    assert valohai.parameters("makemeqwer").value == "qwer"
    assert valohai.parameters("makeme321").value == 321
    assert valohai.parameters("makemenegative").value < 0.0

    assert (get_input_info("example").files[0].uri ==
            "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz")
    assert (get_input_info("myimages").files[0].uri ==
            "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg")
    assert (
        get_input_info("myimages").files[1].uri ==
        "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png"
    )
    assert not get_input_info("overrideme").files[0].uri
    assert os.path.isfile(get_input_info("overrideme").files[0].path)

    assert sum(1 for _ in valohai.inputs("localdata_as_list").paths()) == 2
    assert sum(1
               for _ in valohai.inputs("localdata_with_wildcard").paths()) == 2

    for p in valohai.inputs("localdata_as_list").paths():
        assert os.path.isfile(p)

    for p in valohai.inputs("localdata_with_wildcard").paths():
        assert os.path.isfile(p)
Ejemplo n.º 12
0
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step compare_predictions.py`

    valohai.prepare(
        step='compare-predictions',
        image='python:3.9',
        default_inputs={
            'predictions': {
                'default': None,
                'optional': False,
            },
            'models': [],
        },
    )

    # here we have some simple example logic to compare predictions to figure out which
    # predictions are the best, so this varies from use-case to use-case
    BestModel = namedtuple('BestModel',
                           'prediction, average_best_guess, model')
    best_of_best = BestModel(prediction=None,
                             average_best_guess=None,
                             model=None)
    average_best_guesses = dict()
    model_filename = ''

    for prediction_path in valohai.inputs('predictions').paths():
        filename = os.path.basename(prediction_path)

        extension = os.path.splitext(prediction_path)[1].lower()
        if extension != '.json':
            print(f'{filename} is not a JSON file')
            continue

        with open(prediction_path, 'r') as file:
            blob = json.load(file)

        best_guess_probabilities = []
        for sample_filename, prediction in blob.items():
            best_guess = str(prediction['best_guess'])
            probability = prediction['predictions'][best_guess]
            best_guess_probabilities.append(float32(probability))

        average_best_guess = sum(best_guess_probabilities) / len(
            best_guess_probabilities)
        average_best_guesses[filename] = average_best_guess
        print(
            f'{filename} => {average_best_guess} (average best guess probability)'
        )

        suffix = filename.split('predictions-')[1].split('.json')[0]
        model_filename = f"model-{suffix}.h5"

        if not best_of_best.average_best_guess or average_best_guess > best_of_best.average_best_guess:
            best_of_best = BestModel(
                prediction=filename,
                average_best_guess=average_best_guess,
                model=model_filename,
            )

    print(
        f'The best model is the one that generated {best_of_best.prediction} ({best_of_best.average_best_guess})'
    )

    model_path = next((model for model in valohai.inputs('models').paths()
                       if model_filename in model), '')
    if model_path:
        shutil.copy(model_path, valohai.outputs().path(model_filename))