def test_download(tmpdir, monkeypatch, requests_mock): inputs_dir = str(tmpdir.mkdir("inputs")) monkeypatch.setenv("VH_INPUTS_DIR", inputs_dir) requests_mock.get( "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz" ) requests_mock.get( "https://valohai-mnist.s3.amazonaws.com/train-images-idx3-ubyte.gz" ) requests_mock.get( "https://valohai-mnist.s3.amazonaws.com/train-labels-idx1-ubyte.gz" ) inputs = { "example": "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz", "mnist": [ "https://valohai-mnist.s3.amazonaws.com/train-images-idx3-ubyte.gz", "https://valohai-mnist.s3.amazonaws.com/train-labels-idx1-ubyte.gz", ], } monkeypatch.setattr(sys, "argv", ["myscript.py"]) valohai.prepare(step="test", default_inputs=inputs) # These calls will trigger downloads get_input_vfs("example") get_input_vfs("mnist") assert ( get_input_info("example").files[0].uri == "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz" ) assert ( get_input_info("mnist").files[0].uri == "https://valohai-mnist.s3.amazonaws.com/train-images-idx3-ubyte.gz" ) assert ( get_input_info("mnist").files[1].uri == "https://valohai-mnist.s3.amazonaws.com/train-labels-idx1-ubyte.gz" ) assert requests_mock.call_count == 3 assert os.path.isfile( os.path.join(inputs_dir, "example", "t10k-images-idx3-ubyte.gz") ) assert os.path.isfile( os.path.join(inputs_dir, "mnist", "train-images-idx3-ubyte.gz") ) assert os.path.isfile( os.path.join(inputs_dir, "mnist", "train-labels-idx1-ubyte.gz") ) # Second time around, the file should be cached and not trigger any more downloads get_input_vfs("mnist") get_input_vfs("example") assert requests_mock.call_count == 3
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step batch_inference.py` valohai.prepare( step='batch-inference', image='tensorflow/tensorflow:2.6.0', default_inputs={ 'model': { 'default': None, 'optional': False, }, 'images': [ 'https://valohaidemo.blob.core.windows.net/mnist/four-inverted.png', 'https://valohaidemo.blob.core.windows.net/mnist/five-inverted.png', 'https://valohaidemo.blob.core.windows.net/mnist/five-normal.jpg', ], }, ) print('Loading model') model_path = valohai.inputs('model').path() model = load_model(model_path) json_blob = {} for image_path in valohai.inputs('images').paths(): filename = os.path.basename(image_path) extension = os.path.splitext(image_path)[1].lower() if extension not in ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']: print(f'{filename} is not an image file') continue print(f'Running inference for {filename}') try: image, inverted = process_image(Image.open(image_path)) prediction = predict_image(model, image, inverted) json_blob[filename] = prediction print(filename, prediction) except Exception as exc: json_blob[filename] = {'error': exc} print(f'Unable to process {filename}: {exc}', file=sys.stderr) print('Saving predictions') suffix = '' try: suffix = f'-{model_path.split("model-")[1].split(".h5")[0]}' except IndexError: print(f'Unable to get suffix from {model_path}') json_path = os.path.join( valohai.outputs().path(f'predictions{suffix}.json')) with open(json_path, 'w') as json_file: json.dump(json_blob, json_file, sort_keys=True)
def test_download(tmpdir, monkeypatch, requests_mock): inputs_dir = str(tmpdir.mkdir("inputs")) monkeypatch.setenv("VH_INPUTS_DIR", inputs_dir) requests_mock.get( "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz") requests_mock.get( "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg") requests_mock.get( "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png" ) inputs = { "example": "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz", "myimages": [ "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg", "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png", ], } monkeypatch.setattr(sys, "argv", ["myscript.py"]) valohai.prepare(step="test", default_inputs=inputs) # These calls will trigger downloads assert (load_input_info("example").files[0].uri == "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz") assert (load_input_info("myimages").files[0].uri == "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg") assert ( load_input_info("myimages").files[1].uri == "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png" ) assert requests_mock.call_count == 3 assert os.path.isfile( os.path.join(inputs_dir, "example", "t10k-images-idx3-ubyte.gz")) assert os.path.isfile(os.path.join(inputs_dir, "myimages", "Example.svg")) assert os.path.isfile( os.path.join(inputs_dir, "myimages", "Example_Wikipedia_sandbox_move_UI.png")) # Second time around, the file should be cached and not trigger any more downloads load_input_info("myimages") assert requests_mock.call_count == 3
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step preprocess_dataset.py` valohai.prepare( step='preprocess-dataset', image='python:3.9', default_inputs={ 'dataset': 'https://valohaidemo.blob.core.windows.net/mnist/mnist.npz', }, ) # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation print('Loading data') with np.load(valohai.inputs('dataset').path(), allow_pickle=True) as file: x_train, y_train = file['x_train'], file['y_train'] x_test, y_test = file['x_test'], file['y_test'] print('Preprocessing data') x_train, x_test = x_train / 255.0, x_test / 255.0 # Write output files to Valohai outputs directory # This enables Valohai to version your data # and upload output it to the default data store print('Saving preprocessed data') path = valohai.outputs().path('preprocessed_mnist.npz') np.savez_compressed(path, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
def test_prepare(tmpdir, monkeypatch): inputs_dir = str(tmpdir.mkdir("inputs")) monkeypatch.setenv("VH_INPUTS_DIR", inputs_dir) local_file = tmpdir.mkdir("sub").join("hello.txt") local_file.write("tiku ja taku ja joku") parameters = { "iambool": True, "mestringy": "asdf", "integerboi": 123, "floaty": 0.0001, "makemetrue": False, "makemeqwer": "asdf", "makeme321": 123, "makemenegative": 0.0001, } inputs = { "example": "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz", "overrideme": "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz", "myimages": [ "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg", "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png", ], } with monkeypatch.context() as m: args = [ "", "--makemetrue=true", "--makemeqwer=qwer", "--makeme321=321", "--makemenegative=-0.123", "--some_totally_random_parameter_to_ignore=666", f"--overrideme={str(local_file)}", ] m.setattr( sys, "argv", args, ) valohai.prepare(step="test", default_parameters=parameters, default_inputs=inputs) assert valohai.parameters("iambool").value == True assert valohai.parameters("mestringy").value == "asdf" assert valohai.parameters("integerboi").value == 123 assert valohai.parameters("floaty").value == 0.0001 assert valohai.parameters("makemetrue").value == True assert valohai.parameters("makemeqwer").value == "qwer" assert valohai.parameters("makeme321").value == 321 assert valohai.parameters("makemenegative").value < 0.0 assert (load_input_info("example", download=DownloadType.NEVER).files[0].uri == "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz") assert (load_input_info("myimages", download=DownloadType.NEVER).files[0].uri == "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg") assert ( load_input_info("myimages", download=DownloadType.NEVER).files[1].uri == "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png" ) assert not load_input_info("overrideme", download=DownloadType.NEVER).files[0].uri assert os.path.isfile( load_input_info("overrideme", download=DownloadType.NEVER).files[0].path)
import numpy as np import valohai as vh vh.prepare(step='Preprocess data') # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation with np.load(vh.inputs('mnist').path(), allow_pickle=True) as file: x_train, y_train = file['x_train'], file['y_train'] x_test, y_test = file['x_test'], file['y_test'] x_train, x_test = x_train / 255.0, x_test / 255.0 # Write output files to Valohai outputs directory # This enables Valohai to version your data # and upload output it to the default data store path = vh.outputs('dataset').path('preprocessed_mnist.npz') np.savez(path, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
import numpy as np import tensorflow as tf import valohai as vh vh.prepare(step='Train model') # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation with np.load(vh.inputs('preprocessed_mnist').path(), allow_pickle=True) as f: x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation='softmax') ]) optimizer = tf.keras.optimizers.Adam( learning_rate=vh.parameters('learning_rate').value) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Print metrics out as JSON # This enables Valohai to version your metadata # and for you to use it to compare experiments
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step train_model.py` valohai.prepare( step='train-model', image='tensorflow/tensorflow:2.6.0', default_inputs={ 'dataset': 'https://valohaidemo.blob.core.windows.net/mnist/preprocessed_mnist.npz', }, default_parameters={ 'learning_rate': 0.001, 'epochs': 5, }, ) # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation input_path = valohai.inputs('dataset').path() with np.load(input_path, allow_pickle=True) as f: x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10), ]) optimizer = tf.keras.optimizers.Adam( learning_rate=valohai.parameters('learning_rate').value) loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy']) # Print metrics out as JSON # This enables Valohai to version your metadata # and for you to use it to compare experiments callback = tf.keras.callbacks.LambdaCallback(on_epoch_end=log_metadata) model.fit(x_train, y_train, epochs=valohai.parameters('epochs').value, callbacks=[callback]) # Evaluate the model and print out the test metrics as JSON test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2) with valohai.logger() as logger: logger.log('test_accuracy', test_accuracy) logger.log('test_loss', test_loss) # Write output files to Valohai outputs directory # This enables Valohai to version your data # and upload output it to the default data store suffix = uuid.uuid4() output_path = valohai.outputs().path(f'model-{suffix}.h5') model.save(output_path)
def test_prepare(tmpdir, monkeypatch): inputs_dir = str(tmpdir.mkdir("inputs")) monkeypatch.setenv("VH_INPUTS_DIR", inputs_dir) local_file = tmpdir.mkdir("sub").join("hello.txt") local_file.write("tiku ja taku ja joku") data_dir = tmpdir.mkdir("data") local_data = data_dir.join("data1.dat") local_data.write("I'm a big data") local_data2 = data_dir.join("data2.dat") local_data2.write("I'm a huge data") parameters = { "iambool": True, "mestringy": "asdf", "integerboi": 123, "floaty": 0.0001, "makemetrue": False, "makemefalse": True, "makemeqwer": "asdf", "makeme321": 123, "makemenegative": 0.0001, } inputs = { "example": "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz", "overrideme": "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz", "myimages": [ "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg", "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png", ], "localdata_as_list": [str(local_data), str(local_data2)], "localdata_with_wildcard": os.path.join(str(data_dir), "*.dat"), } with monkeypatch.context() as m: args = [ "", "--makemetrue=true", "--makemefalse=false", "--makemeqwer=qwer", "--makeme321=321", "--makemenegative=-0.123", "--some_totally_random_parameter_to_ignore=666", f"--overrideme={str(local_file)}", ] m.setattr( sys, "argv", args, ) valohai.prepare(step="test", default_parameters=parameters, default_inputs=inputs) assert valohai.parameters("iambool").value is True assert valohai.parameters("mestringy").value == "asdf" assert valohai.parameters("integerboi").value == 123 assert valohai.parameters("floaty").value == 0.0001 assert valohai.parameters("makemetrue").value is True assert valohai.parameters("makemefalse").value is False assert valohai.parameters("makemeqwer").value == "qwer" assert valohai.parameters("makeme321").value == 321 assert valohai.parameters("makemenegative").value < 0.0 assert (get_input_info("example").files[0].uri == "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz") assert (get_input_info("myimages").files[0].uri == "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg") assert ( get_input_info("myimages").files[1].uri == "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png" ) assert not get_input_info("overrideme").files[0].uri assert os.path.isfile(get_input_info("overrideme").files[0].path) assert sum(1 for _ in valohai.inputs("localdata_as_list").paths()) == 2 assert sum(1 for _ in valohai.inputs("localdata_with_wildcard").paths()) == 2 for p in valohai.inputs("localdata_as_list").paths(): assert os.path.isfile(p) for p in valohai.inputs("localdata_with_wildcard").paths(): assert os.path.isfile(p)
import valohai params = { "param1": True, "param2": "asdf", "param3": 123, "param4": 0.0001, } inputs = {"input1": "asdf/*", "input2": ["yolol", "yalala"]} def prepare(a, b): print(f"this is fake method {a} {b}") valohai.prepare(step="foobar1", default_parameters=params, default_inputs=inputs)
import valohai valohai.prepare( step="foobar3", default_parameters={ "param1": True, "param2": "asdf", "param3": 123, "param4": 0.0001, }, default_inputs={ "input1": "asdf", "input2": ["yolol", "yalala"] }, )
import valohai params = { "seq_length": 14, "num_epochs": 200, } def prepare(a, b): print(f"this is fake method {a} {b}") valohai.prepare(step="mystep", default_parameters=params, image="valohai/keras")
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step compare_predictions.py` valohai.prepare( step='compare-predictions', image='python:3.9', default_inputs={ 'predictions': { 'default': None, 'optional': False, }, 'models': [], }, ) # here we have some simple example logic to compare predictions to figure out which # predictions are the best, so this varies from use-case to use-case BestModel = namedtuple('BestModel', 'prediction, average_best_guess, model') best_of_best = BestModel(prediction=None, average_best_guess=None, model=None) average_best_guesses = dict() model_filename = '' for prediction_path in valohai.inputs('predictions').paths(): filename = os.path.basename(prediction_path) extension = os.path.splitext(prediction_path)[1].lower() if extension != '.json': print(f'{filename} is not a JSON file') continue with open(prediction_path, 'r') as file: blob = json.load(file) best_guess_probabilities = [] for sample_filename, prediction in blob.items(): best_guess = str(prediction['best_guess']) probability = prediction['predictions'][best_guess] best_guess_probabilities.append(float32(probability)) average_best_guess = sum(best_guess_probabilities) / len( best_guess_probabilities) average_best_guesses[filename] = average_best_guess print( f'{filename} => {average_best_guess} (average best guess probability)' ) suffix = filename.split('predictions-')[1].split('.json')[0] model_filename = f"model-{suffix}.h5" if not best_of_best.average_best_guess or average_best_guess > best_of_best.average_best_guess: best_of_best = BestModel( prediction=filename, average_best_guess=average_best_guess, model=model_filename, ) print( f'The best model is the one that generated {best_of_best.prediction} ({best_of_best.average_best_guess})' ) model_path = next((model for model in valohai.inputs('models').paths() if model_filename in model), '') if model_path: shutil.copy(model_path, valohai.outputs().path(model_filename))
import valohai from valohai.parameters import get_parameter from valohai.inputs import get_input_file_paths from valohai.outputs import get_output_path parameters = { "width": 640, "height": 480, } inputs = { "images": [ "https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg", "https://homepages.cae.wisc.edu/~ece533/images/airplane.png", ] } valohai.prepare(step="resize", parameters=parameters, inputs=inputs) def resize_image(in_path, out_path, width, height): image = Image.open(in_path) print( f"Resizing {in_path} ({image.size[0]}x{image.size[1]}) to {out_path} ({width}x{height})" ) new_image = image.resize((width, height)) new_image.save(out_path) if __name__ == '__main__': for image_path in valohai.inputs.get_input_file_paths("images"): filename = os.path.basename(image_path) resize_image(
params = { "batch_size": { "default": 32, "type": "integer", "description": "Size of the training batch", "pass-as": "--batch:{v}", "optional": True, "multiple-separator": "!", }, "learning_rate": { "default": 0.001, }, "dropout": 0.2, } inputs = { "classes": { "default": "s3://special-bucket/foo/bar/**.txt", "optional": True, "filename": "asdf.txt", "keep-directories": "full", }, "images": { "default": "s3://special-bucket/images/**.jpg", }, "weights": "s3://special-bucket/weights/yolo.pb", } valohai.prepare(step="train", default_parameters=params, default_inputs=inputs)