def test_live_upload(outputs_path):
    path = valohai.outputs().path("hello.txt")
    with open(path, "w") as fp:
        fp.write("hello")
    valohai.outputs().live_upload(path)

    with pytest.raises(IOError):
        # Test the file is set read-only
        open(path, "w")
def test_compress_wildcards(tmpdir, format, remove_originals, filter,
                            expected_files):
    source_dir = tmpdir.strpath
    created_files = create_files(source_dir)

    filename = f"hello.{format}"
    package_path = valohai.outputs("foo").compress(
        source=os.path.join(source_dir, filter),
        filename=filename,
        remove_originals=remove_originals,
    )

    for path in created_files:
        # picture.jpg should be always compressed with our filter(s)
        if "picture.jpg" in path:
            assert os.path.isfile(path) != remove_originals

    if format == "zip":
        with zipfile.ZipFile(package_path) as zf:
            assert len(zf.namelist()) == expected_files
            # picture.jpg should be always compressed with our filter(s)
            assert "folder/picture.jpg" in zf.namelist()
    elif "tar" in format:
        with tarfile.open(package_path, "r:*") as tf:
            assert len(list(tf)) == expected_files
            # picture.jpg should be always compressed with our filter(s)
            assert "folder/picture.jpg" in [
                tarinfo.name for tarinfo in list(tf)
            ]
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step batch_inference.py`

    valohai.prepare(
        step='batch-inference',
        image='tensorflow/tensorflow:2.6.0',
        default_inputs={
            'model': {
                'default': None,
                'optional': False,
            },
            'images': [
                'https://valohaidemo.blob.core.windows.net/mnist/four-inverted.png',
                'https://valohaidemo.blob.core.windows.net/mnist/five-inverted.png',
                'https://valohaidemo.blob.core.windows.net/mnist/five-normal.jpg',
            ],
        },
    )

    print('Loading model')
    model_path = valohai.inputs('model').path()
    model = load_model(model_path)

    json_blob = {}
    for image_path in valohai.inputs('images').paths():
        filename = os.path.basename(image_path)

        extension = os.path.splitext(image_path)[1].lower()
        if extension not in ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']:
            print(f'{filename} is not an image file')
            continue

        print(f'Running inference for {filename}')
        try:
            image, inverted = process_image(Image.open(image_path))
            prediction = predict_image(model, image, inverted)
            json_blob[filename] = prediction
            print(filename, prediction)
        except Exception as exc:
            json_blob[filename] = {'error': exc}
            print(f'Unable to process {filename}: {exc}', file=sys.stderr)

    print('Saving predictions')
    suffix = ''
    try:
        suffix = f'-{model_path.split("model-")[1].split(".h5")[0]}'
    except IndexError:
        print(f'Unable to get suffix from {model_path}')

    json_path = os.path.join(
        valohai.outputs().path(f'predictions{suffix}.json'))
    with open(json_path, 'w') as json_file:
        json.dump(json_blob, json_file, sort_keys=True)
def test_compress(output_files, format, remove_originals):
    filename = f"hello.{format}"
    package_path = valohai.outputs("morjes").compress(
        output_files, filename, remove_originals=remove_originals)

    for path in output_files:
        assert os.path.isfile(path) != remove_originals

    # Quick format smoke checks.
    if format == "zip":
        with zipfile.ZipFile(package_path) as zf:
            assert zf.namelist()
    elif "tar" in format:
        with tarfile.open(package_path, "r:*") as tf:
            assert len(list(tf))
Exemple #5
0
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step preprocess_dataset.py`

    valohai.prepare(
        step='preprocess-dataset',
        image='python:3.9',
        default_inputs={
            'dataset':
            'https://valohaidemo.blob.core.windows.net/mnist/mnist.npz',
        },
    )

    # Read input files from Valohai inputs directory
    # This enables Valohai to version your training data
    # and cache the data for quick experimentation

    print('Loading data')
    with np.load(valohai.inputs('dataset').path(), allow_pickle=True) as file:
        x_train, y_train = file['x_train'], file['y_train']
        x_test, y_test = file['x_test'], file['y_test']

    print('Preprocessing data')
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # Write output files to Valohai outputs directory
    # This enables Valohai to version your data
    # and upload output it to the default data store

    print('Saving preprocessed data')
    path = valohai.outputs().path('preprocessed_mnist.npz')
    np.savez_compressed(path,
                        x_train=x_train,
                        y_train=y_train,
                        x_test=x_test,
                        y_test=y_test)
Exemple #6
0
import numpy as np
import valohai as vh


vh.prepare(step='Preprocess data')

# Read input files from Valohai inputs directory
# This enables Valohai to version your training data
# and cache the data for quick experimentation

with np.load(vh.inputs('mnist').path(), allow_pickle=True) as file:
    x_train, y_train = file['x_train'], file['y_train']
    x_test, y_test = file['x_test'], file['y_test']

x_train, x_test = x_train / 255.0, x_test / 255.0


# Write output files to Valohai outputs directory
# This enables Valohai to version your data 
# and upload output it to the default data store

path = vh.outputs('dataset').path('preprocessed_mnist.npz')
np.savez(path, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
Exemple #7
0
# This enables Valohai to version your metadata
# and for you to use it to compare experiments


def log(epoch, logs):
    with vh.logger() as logger:
        logger.log('epoch', epoch)
        logger.log('accuracy', logs['accuracy'])
        logger.log('loss', logs['loss'])


cb = tf.keras.callbacks.LambdaCallback(on_epoch_end=log)

model.fit(x_train,
          y_train,
          epochs=vh.parameters('epochs').value,
          callbacks=[cb])

# Evaluate the model and print out the test metrics as JSON

test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
with vh.logger() as logger:
    logger.log('test_accuracy', test_acc)
    logger.log('test_loss', test_loss)

# Write output files to Valohai outputs directory
# This enables Valohai to version your data
# and upload output it to the default data store

model.save(vh.outputs('model').path('model.h5'))
Exemple #8
0
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step train_model.py`

    valohai.prepare(
        step='train-model',
        image='tensorflow/tensorflow:2.6.0',
        default_inputs={
            'dataset':
            'https://valohaidemo.blob.core.windows.net/mnist/preprocessed_mnist.npz',
        },
        default_parameters={
            'learning_rate': 0.001,
            'epochs': 5,
        },
    )

    # Read input files from Valohai inputs directory
    # This enables Valohai to version your training data
    # and cache the data for quick experimentation

    input_path = valohai.inputs('dataset').path()
    with np.load(input_path, allow_pickle=True) as f:
        x_train, y_train = f['x_train'], f['y_train']
        x_test, y_test = f['x_test'], f['y_test']

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10),
    ])

    optimizer = tf.keras.optimizers.Adam(
        learning_rate=valohai.parameters('learning_rate').value)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

    # Print metrics out as JSON
    # This enables Valohai to version your metadata
    # and for you to use it to compare experiments

    callback = tf.keras.callbacks.LambdaCallback(on_epoch_end=log_metadata)
    model.fit(x_train,
              y_train,
              epochs=valohai.parameters('epochs').value,
              callbacks=[callback])

    # Evaluate the model and print out the test metrics as JSON

    test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
    with valohai.logger() as logger:
        logger.log('test_accuracy', test_accuracy)
        logger.log('test_loss', test_loss)

    # Write output files to Valohai outputs directory
    # This enables Valohai to version your data
    # and upload output it to the default data store

    suffix = uuid.uuid4()
    output_path = valohai.outputs().path(f'model-{suffix}.h5')
    model.save(output_path)
def test_get_output_path(outputs_path, name, fragment):
    path = valohai.outputs(name).path(fragment)
    assert path == os.path.join(outputs_path, name, fragment)
    assert os.path.isdir(os.path.dirname(path))
Exemple #10
0
def main():
    # valohai.prepare enables us to update the valohai.yaml configuration file with
    # the Valohai command-line client by running `valohai yaml step compare_predictions.py`

    valohai.prepare(
        step='compare-predictions',
        image='python:3.9',
        default_inputs={
            'predictions': {
                'default': None,
                'optional': False,
            },
            'models': [],
        },
    )

    # here we have some simple example logic to compare predictions to figure out which
    # predictions are the best, so this varies from use-case to use-case
    BestModel = namedtuple('BestModel',
                           'prediction, average_best_guess, model')
    best_of_best = BestModel(prediction=None,
                             average_best_guess=None,
                             model=None)
    average_best_guesses = dict()
    model_filename = ''

    for prediction_path in valohai.inputs('predictions').paths():
        filename = os.path.basename(prediction_path)

        extension = os.path.splitext(prediction_path)[1].lower()
        if extension != '.json':
            print(f'{filename} is not a JSON file')
            continue

        with open(prediction_path, 'r') as file:
            blob = json.load(file)

        best_guess_probabilities = []
        for sample_filename, prediction in blob.items():
            best_guess = str(prediction['best_guess'])
            probability = prediction['predictions'][best_guess]
            best_guess_probabilities.append(float32(probability))

        average_best_guess = sum(best_guess_probabilities) / len(
            best_guess_probabilities)
        average_best_guesses[filename] = average_best_guess
        print(
            f'{filename} => {average_best_guess} (average best guess probability)'
        )

        suffix = filename.split('predictions-')[1].split('.json')[0]
        model_filename = f"model-{suffix}.h5"

        if not best_of_best.average_best_guess or average_best_guess > best_of_best.average_best_guess:
            best_of_best = BestModel(
                prediction=filename,
                average_best_guess=average_best_guess,
                model=model_filename,
            )

    print(
        f'The best model is the one that generated {best_of_best.prediction} ({best_of_best.average_best_guess})'
    )

    model_path = next((model for model in valohai.inputs('models').paths()
                       if model_filename in model), '')
    if model_path:
        shutil.copy(model_path, valohai.outputs().path(model_filename))
Exemple #11
0
import matplotlib.pyplot as plt
import numpy as np
import valohai

np.random.seed(19680801)
data = np.random.randn(2, 100)

fig, axs = plt.subplots(2, 2, figsize=(5, 5))
axs[0, 0].hist(data[0])
axs[1, 0].scatter(data[0], data[1])
axs[0, 1].plot(data[0], data[1])
axs[1, 1].hist2d(data[0], data[1])

save_path = valohai.outputs().path('myplot.png')
plt.savefig(save_path)

plt.show()