コード例 #1
0
def test_download(tmpdir, monkeypatch, requests_mock):
    inputs_dir = str(tmpdir.mkdir("inputs"))
    monkeypatch.setenv("VH_INPUTS_DIR", inputs_dir)

    requests_mock.get(
        "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz"
    )
    requests_mock.get(
        "https://valohai-mnist.s3.amazonaws.com/train-images-idx3-ubyte.gz"
    )
    requests_mock.get(
        "https://valohai-mnist.s3.amazonaws.com/train-labels-idx1-ubyte.gz"
    )

    inputs = {
        "example": "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz",
        "mnist": [
            "https://valohai-mnist.s3.amazonaws.com/train-images-idx3-ubyte.gz",
            "https://valohai-mnist.s3.amazonaws.com/train-labels-idx1-ubyte.gz",
        ],
    }

    monkeypatch.setattr(sys, "argv", ["myscript.py"])
    valohai.prepare(step="test", default_inputs=inputs)

    # These calls will trigger downloads
    get_input_vfs("example")
    get_input_vfs("mnist")

    assert (
        get_input_info("example").files[0].uri
        == "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz"
    )
    assert (
        get_input_info("mnist").files[0].uri
        == "https://valohai-mnist.s3.amazonaws.com/train-images-idx3-ubyte.gz"
    )
    assert (
        get_input_info("mnist").files[1].uri
        == "https://valohai-mnist.s3.amazonaws.com/train-labels-idx1-ubyte.gz"
    )
    assert requests_mock.call_count == 3

    assert os.path.isfile(
        os.path.join(inputs_dir, "example", "t10k-images-idx3-ubyte.gz")
    )
    assert os.path.isfile(
        os.path.join(inputs_dir, "mnist", "train-images-idx3-ubyte.gz")
    )
    assert os.path.isfile(
        os.path.join(inputs_dir, "mnist", "train-labels-idx1-ubyte.gz")
    )

    # Second time around, the file should be cached and not trigger any more downloads
    get_input_vfs("mnist")
    get_input_vfs("example")

    assert requests_mock.call_count == 3
コード例 #2
0
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step batch_inference.py`

    valohai.prepare(
        step='batch-inference',
        image='tensorflow/tensorflow:2.6.0',
        default_inputs={
            'model': {
                'default': None,
                'optional': False,
            },
            'images': [
                'https://valohaidemo.blob.core.windows.net/mnist/four-inverted.png',
                'https://valohaidemo.blob.core.windows.net/mnist/five-inverted.png',
                'https://valohaidemo.blob.core.windows.net/mnist/five-normal.jpg',
            ],
        },
    )

    print('Loading model')
    model_path = valohai.inputs('model').path()
    model = load_model(model_path)

    json_blob = {}
    for image_path in valohai.inputs('images').paths():
        filename = os.path.basename(image_path)

        extension = os.path.splitext(image_path)[1].lower()
        if extension not in ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']:
            print(f'{filename} is not an image file')
            continue

        print(f'Running inference for {filename}')
        try:
            image, inverted = process_image(Image.open(image_path))
            prediction = predict_image(model, image, inverted)
            json_blob[filename] = prediction
            print(filename, prediction)
        except Exception as exc:
            json_blob[filename] = {'error': exc}
            print(f'Unable to process {filename}: {exc}', file=sys.stderr)

    print('Saving predictions')
    suffix = ''
    try:
        suffix = f'-{model_path.split("model-")[1].split(".h5")[0]}'
    except IndexError:
        print(f'Unable to get suffix from {model_path}')

    json_path = os.path.join(
        valohai.outputs().path(f'predictions{suffix}.json'))
    with open(json_path, 'w') as json_file:
        json.dump(json_blob, json_file, sort_keys=True)
コード例 #3
0
def test_download(tmpdir, monkeypatch, requests_mock):
    inputs_dir = str(tmpdir.mkdir("inputs"))
    monkeypatch.setenv("VH_INPUTS_DIR", inputs_dir)

    requests_mock.get(
        "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz")
    requests_mock.get(
        "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg")
    requests_mock.get(
        "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png"
    )

    inputs = {
        "example":
        "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz",
        "myimages": [
            "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg",
            "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png",
        ],
    }

    monkeypatch.setattr(sys, "argv", ["myscript.py"])
    valohai.prepare(step="test", default_inputs=inputs)

    # These calls will trigger downloads
    assert (load_input_info("example").files[0].uri ==
            "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz")
    assert (load_input_info("myimages").files[0].uri ==
            "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg")
    assert (
        load_input_info("myimages").files[1].uri ==
        "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png"
    )

    assert requests_mock.call_count == 3

    assert os.path.isfile(
        os.path.join(inputs_dir, "example", "t10k-images-idx3-ubyte.gz"))
    assert os.path.isfile(os.path.join(inputs_dir, "myimages", "Example.svg"))
    assert os.path.isfile(
        os.path.join(inputs_dir, "myimages",
                     "Example_Wikipedia_sandbox_move_UI.png"))

    # Second time around, the file should be cached and not trigger any more downloads
    load_input_info("myimages")
    assert requests_mock.call_count == 3
コード例 #4
0
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step preprocess_dataset.py`

    valohai.prepare(
        step='preprocess-dataset',
        image='python:3.9',
        default_inputs={
            'dataset':
            'https://valohaidemo.blob.core.windows.net/mnist/mnist.npz',
        },
    )

    # Read input files from Valohai inputs directory
    # This enables Valohai to version your training data
    # and cache the data for quick experimentation

    print('Loading data')
    with np.load(valohai.inputs('dataset').path(), allow_pickle=True) as file:
        x_train, y_train = file['x_train'], file['y_train']
        x_test, y_test = file['x_test'], file['y_test']

    print('Preprocessing data')
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # Write output files to Valohai outputs directory
    # This enables Valohai to version your data
    # and upload output it to the default data store

    print('Saving preprocessed data')
    path = valohai.outputs().path('preprocessed_mnist.npz')
    np.savez_compressed(path,
                        x_train=x_train,
                        y_train=y_train,
                        x_test=x_test,
                        y_test=y_test)
コード例 #5
0
def test_prepare(tmpdir, monkeypatch):
    inputs_dir = str(tmpdir.mkdir("inputs"))
    monkeypatch.setenv("VH_INPUTS_DIR", inputs_dir)
    local_file = tmpdir.mkdir("sub").join("hello.txt")
    local_file.write("tiku ja taku ja joku")

    parameters = {
        "iambool": True,
        "mestringy": "asdf",
        "integerboi": 123,
        "floaty": 0.0001,
        "makemetrue": False,
        "makemeqwer": "asdf",
        "makeme321": 123,
        "makemenegative": 0.0001,
    }
    inputs = {
        "example":
        "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz",
        "overrideme":
        "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz",
        "myimages": [
            "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg",
            "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png",
        ],
    }

    with monkeypatch.context() as m:
        args = [
            "",
            "--makemetrue=true",
            "--makemeqwer=qwer",
            "--makeme321=321",
            "--makemenegative=-0.123",
            "--some_totally_random_parameter_to_ignore=666",
            f"--overrideme={str(local_file)}",
        ]
        m.setattr(
            sys,
            "argv",
            args,
        )
        valohai.prepare(step="test",
                        default_parameters=parameters,
                        default_inputs=inputs)

    assert valohai.parameters("iambool").value == True
    assert valohai.parameters("mestringy").value == "asdf"
    assert valohai.parameters("integerboi").value == 123
    assert valohai.parameters("floaty").value == 0.0001
    assert valohai.parameters("makemetrue").value == True
    assert valohai.parameters("makemeqwer").value == "qwer"
    assert valohai.parameters("makeme321").value == 321
    assert valohai.parameters("makemenegative").value < 0.0

    assert (load_input_info("example",
                            download=DownloadType.NEVER).files[0].uri ==
            "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz")
    assert (load_input_info("myimages",
                            download=DownloadType.NEVER).files[0].uri ==
            "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg")
    assert (
        load_input_info("myimages",
                        download=DownloadType.NEVER).files[1].uri ==
        "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png"
    )
    assert not load_input_info("overrideme",
                               download=DownloadType.NEVER).files[0].uri
    assert os.path.isfile(
        load_input_info("overrideme",
                        download=DownloadType.NEVER).files[0].path)
コード例 #6
0
import numpy as np
import valohai as vh


vh.prepare(step='Preprocess data')

# Read input files from Valohai inputs directory
# This enables Valohai to version your training data
# and cache the data for quick experimentation

with np.load(vh.inputs('mnist').path(), allow_pickle=True) as file:
    x_train, y_train = file['x_train'], file['y_train']
    x_test, y_test = file['x_test'], file['y_test']

x_train, x_test = x_train / 255.0, x_test / 255.0


# Write output files to Valohai outputs directory
# This enables Valohai to version your data 
# and upload output it to the default data store

path = vh.outputs('dataset').path('preprocessed_mnist.npz')
np.savez(path, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
コード例 #7
0
ファイル: train.py プロジェクト: orasimus/tensorflow-example
import numpy as np
import tensorflow as tf
import valohai as vh

vh.prepare(step='Train model')

# Read input files from Valohai inputs directory
# This enables Valohai to version your training data
# and cache the data for quick experimentation

with np.load(vh.inputs('preprocessed_mnist').path(), allow_pickle=True) as f:
    x_train, y_train = f['x_train'], f['y_train']
    x_test, y_test = f['x_test'], f['y_test']

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(
    learning_rate=vh.parameters('learning_rate').value)
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Print metrics out as JSON
# This enables Valohai to version your metadata
# and for you to use it to compare experiments
コード例 #8
0
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step train_model.py`

    valohai.prepare(
        step='train-model',
        image='tensorflow/tensorflow:2.6.0',
        default_inputs={
            'dataset':
            'https://valohaidemo.blob.core.windows.net/mnist/preprocessed_mnist.npz',
        },
        default_parameters={
            'learning_rate': 0.001,
            'epochs': 5,
        },
    )

    # Read input files from Valohai inputs directory
    # This enables Valohai to version your training data
    # and cache the data for quick experimentation

    input_path = valohai.inputs('dataset').path()
    with np.load(input_path, allow_pickle=True) as f:
        x_train, y_train = f['x_train'], f['y_train']
        x_test, y_test = f['x_test'], f['y_test']

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10),
    ])

    optimizer = tf.keras.optimizers.Adam(
        learning_rate=valohai.parameters('learning_rate').value)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

    # Print metrics out as JSON
    # This enables Valohai to version your metadata
    # and for you to use it to compare experiments

    callback = tf.keras.callbacks.LambdaCallback(on_epoch_end=log_metadata)
    model.fit(x_train,
              y_train,
              epochs=valohai.parameters('epochs').value,
              callbacks=[callback])

    # Evaluate the model and print out the test metrics as JSON

    test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
    with valohai.logger() as logger:
        logger.log('test_accuracy', test_accuracy)
        logger.log('test_loss', test_loss)

    # Write output files to Valohai outputs directory
    # This enables Valohai to version your data
    # and upload output it to the default data store

    suffix = uuid.uuid4()
    output_path = valohai.outputs().path(f'model-{suffix}.h5')
    model.save(output_path)
コード例 #9
0
ファイル: test_prepare.py プロジェクト: valohai/valohai-utils
def test_prepare(tmpdir, monkeypatch):
    inputs_dir = str(tmpdir.mkdir("inputs"))
    monkeypatch.setenv("VH_INPUTS_DIR", inputs_dir)
    local_file = tmpdir.mkdir("sub").join("hello.txt")
    local_file.write("tiku ja taku ja joku")

    data_dir = tmpdir.mkdir("data")
    local_data = data_dir.join("data1.dat")
    local_data.write("I'm a big data")
    local_data2 = data_dir.join("data2.dat")
    local_data2.write("I'm a huge data")

    parameters = {
        "iambool": True,
        "mestringy": "asdf",
        "integerboi": 123,
        "floaty": 0.0001,
        "makemetrue": False,
        "makemefalse": True,
        "makemeqwer": "asdf",
        "makeme321": 123,
        "makemenegative": 0.0001,
    }
    inputs = {
        "example":
        "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz",
        "overrideme":
        "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz",
        "myimages": [
            "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg",
            "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png",
        ],
        "localdata_as_list": [str(local_data),
                              str(local_data2)],
        "localdata_with_wildcard":
        os.path.join(str(data_dir), "*.dat"),
    }

    with monkeypatch.context() as m:
        args = [
            "",
            "--makemetrue=true",
            "--makemefalse=false",
            "--makemeqwer=qwer",
            "--makeme321=321",
            "--makemenegative=-0.123",
            "--some_totally_random_parameter_to_ignore=666",
            f"--overrideme={str(local_file)}",
        ]
        m.setattr(
            sys,
            "argv",
            args,
        )
        valohai.prepare(step="test",
                        default_parameters=parameters,
                        default_inputs=inputs)

    assert valohai.parameters("iambool").value is True
    assert valohai.parameters("mestringy").value == "asdf"
    assert valohai.parameters("integerboi").value == 123
    assert valohai.parameters("floaty").value == 0.0001
    assert valohai.parameters("makemetrue").value is True
    assert valohai.parameters("makemefalse").value is False
    assert valohai.parameters("makemeqwer").value == "qwer"
    assert valohai.parameters("makeme321").value == 321
    assert valohai.parameters("makemenegative").value < 0.0

    assert (get_input_info("example").files[0].uri ==
            "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz")
    assert (get_input_info("myimages").files[0].uri ==
            "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg")
    assert (
        get_input_info("myimages").files[1].uri ==
        "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png"
    )
    assert not get_input_info("overrideme").files[0].uri
    assert os.path.isfile(get_input_info("overrideme").files[0].path)

    assert sum(1 for _ in valohai.inputs("localdata_as_list").paths()) == 2
    assert sum(1
               for _ in valohai.inputs("localdata_with_wildcard").paths()) == 2

    for p in valohai.inputs("localdata_as_list").paths():
        assert os.path.isfile(p)

    for p in valohai.inputs("localdata_with_wildcard").paths():
        assert os.path.isfile(p)
コード例 #10
0
import valohai

params = {
    "param1": True,
    "param2": "asdf",
    "param3": 123,
    "param4": 0.0001,
}

inputs = {"input1": "asdf/*", "input2": ["yolol", "yalala"]}


def prepare(a, b):
    print(f"this is fake method {a} {b}")


valohai.prepare(step="foobar1",
                default_parameters=params,
                default_inputs=inputs)
コード例 #11
0
import valohai

valohai.prepare(
    step="foobar3",
    default_parameters={
        "param1": True,
        "param2": "asdf",
        "param3": 123,
        "param4": 0.0001,
    },
    default_inputs={
        "input1": "asdf",
        "input2": ["yolol", "yalala"]
    },
)
コード例 #12
0
ファイル: test5.py プロジェクト: valohai/valohai-utils
import valohai

params = {
    "seq_length": 14,
    "num_epochs": 200,
}


def prepare(a, b):
    print(f"this is fake method {a} {b}")


valohai.prepare(step="mystep",
                default_parameters=params,
                image="valohai/keras")
コード例 #13
0
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step compare_predictions.py`

    valohai.prepare(
        step='compare-predictions',
        image='python:3.9',
        default_inputs={
            'predictions': {
                'default': None,
                'optional': False,
            },
            'models': [],
        },
    )

    # here we have some simple example logic to compare predictions to figure out which
    # predictions are the best, so this varies from use-case to use-case
    BestModel = namedtuple('BestModel',
                           'prediction, average_best_guess, model')
    best_of_best = BestModel(prediction=None,
                             average_best_guess=None,
                             model=None)
    average_best_guesses = dict()
    model_filename = ''

    for prediction_path in valohai.inputs('predictions').paths():
        filename = os.path.basename(prediction_path)

        extension = os.path.splitext(prediction_path)[1].lower()
        if extension != '.json':
            print(f'{filename} is not a JSON file')
            continue

        with open(prediction_path, 'r') as file:
            blob = json.load(file)

        best_guess_probabilities = []
        for sample_filename, prediction in blob.items():
            best_guess = str(prediction['best_guess'])
            probability = prediction['predictions'][best_guess]
            best_guess_probabilities.append(float32(probability))

        average_best_guess = sum(best_guess_probabilities) / len(
            best_guess_probabilities)
        average_best_guesses[filename] = average_best_guess
        print(
            f'{filename} => {average_best_guess} (average best guess probability)'
        )

        suffix = filename.split('predictions-')[1].split('.json')[0]
        model_filename = f"model-{suffix}.h5"

        if not best_of_best.average_best_guess or average_best_guess > best_of_best.average_best_guess:
            best_of_best = BestModel(
                prediction=filename,
                average_best_guess=average_best_guess,
                model=model_filename,
            )

    print(
        f'The best model is the one that generated {best_of_best.prediction} ({best_of_best.average_best_guess})'
    )

    model_path = next((model for model in valohai.inputs('models').paths()
                       if model_filename in model), '')
    if model_path:
        shutil.copy(model_path, valohai.outputs().path(model_filename))
コード例 #14
0
ファイル: resize.py プロジェクト: valohai/utils-example
import valohai
from valohai.parameters import get_parameter
from valohai.inputs import get_input_file_paths
from valohai.outputs import get_output_path

parameters = {
    "width": 640,
    "height": 480,
}
inputs = {
    "images": [
        "https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg",
        "https://homepages.cae.wisc.edu/~ece533/images/airplane.png",
    ]
}
valohai.prepare(step="resize", parameters=parameters, inputs=inputs)


def resize_image(in_path, out_path, width, height):
    image = Image.open(in_path)
    print(
        f"Resizing {in_path} ({image.size[0]}x{image.size[1]}) to {out_path} ({width}x{height})"
    )
    new_image = image.resize((width, height))
    new_image.save(out_path)


if __name__ == '__main__':
    for image_path in valohai.inputs.get_input_file_paths("images"):
        filename = os.path.basename(image_path)
        resize_image(
コード例 #15
0
ファイル: test8.py プロジェクト: valohai/valohai-utils
params = {
    "batch_size": {
        "default": 32,
        "type": "integer",
        "description": "Size of the training batch",
        "pass-as": "--batch:{v}",
        "optional": True,
        "multiple-separator": "!",
    },
    "learning_rate": {
        "default": 0.001,
    },
    "dropout": 0.2,
}

inputs = {
    "classes": {
        "default": "s3://special-bucket/foo/bar/**.txt",
        "optional": True,
        "filename": "asdf.txt",
        "keep-directories": "full",
    },
    "images": {
        "default": "s3://special-bucket/images/**.jpg",
    },
    "weights": "s3://special-bucket/weights/yolo.pb",
}

valohai.prepare(step="train", default_parameters=params, default_inputs=inputs)