def test_live_upload(outputs_path): path = valohai.outputs().path("hello.txt") with open(path, "w") as fp: fp.write("hello") valohai.outputs().live_upload(path) with pytest.raises(IOError): # Test the file is set read-only open(path, "w")
def test_compress_wildcards(tmpdir, format, remove_originals, filter, expected_files): source_dir = tmpdir.strpath created_files = create_files(source_dir) filename = f"hello.{format}" package_path = valohai.outputs("foo").compress( source=os.path.join(source_dir, filter), filename=filename, remove_originals=remove_originals, ) for path in created_files: # picture.jpg should be always compressed with our filter(s) if "picture.jpg" in path: assert os.path.isfile(path) != remove_originals if format == "zip": with zipfile.ZipFile(package_path) as zf: assert len(zf.namelist()) == expected_files # picture.jpg should be always compressed with our filter(s) assert "folder/picture.jpg" in zf.namelist() elif "tar" in format: with tarfile.open(package_path, "r:*") as tf: assert len(list(tf)) == expected_files # picture.jpg should be always compressed with our filter(s) assert "folder/picture.jpg" in [ tarinfo.name for tarinfo in list(tf) ]
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step batch_inference.py` valohai.prepare( step='batch-inference', image='tensorflow/tensorflow:2.6.0', default_inputs={ 'model': { 'default': None, 'optional': False, }, 'images': [ 'https://valohaidemo.blob.core.windows.net/mnist/four-inverted.png', 'https://valohaidemo.blob.core.windows.net/mnist/five-inverted.png', 'https://valohaidemo.blob.core.windows.net/mnist/five-normal.jpg', ], }, ) print('Loading model') model_path = valohai.inputs('model').path() model = load_model(model_path) json_blob = {} for image_path in valohai.inputs('images').paths(): filename = os.path.basename(image_path) extension = os.path.splitext(image_path)[1].lower() if extension not in ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']: print(f'{filename} is not an image file') continue print(f'Running inference for {filename}') try: image, inverted = process_image(Image.open(image_path)) prediction = predict_image(model, image, inverted) json_blob[filename] = prediction print(filename, prediction) except Exception as exc: json_blob[filename] = {'error': exc} print(f'Unable to process {filename}: {exc}', file=sys.stderr) print('Saving predictions') suffix = '' try: suffix = f'-{model_path.split("model-")[1].split(".h5")[0]}' except IndexError: print(f'Unable to get suffix from {model_path}') json_path = os.path.join( valohai.outputs().path(f'predictions{suffix}.json')) with open(json_path, 'w') as json_file: json.dump(json_blob, json_file, sort_keys=True)
def test_compress(output_files, format, remove_originals): filename = f"hello.{format}" package_path = valohai.outputs("morjes").compress( output_files, filename, remove_originals=remove_originals) for path in output_files: assert os.path.isfile(path) != remove_originals # Quick format smoke checks. if format == "zip": with zipfile.ZipFile(package_path) as zf: assert zf.namelist() elif "tar" in format: with tarfile.open(package_path, "r:*") as tf: assert len(list(tf))
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step preprocess_dataset.py` valohai.prepare( step='preprocess-dataset', image='python:3.9', default_inputs={ 'dataset': 'https://valohaidemo.blob.core.windows.net/mnist/mnist.npz', }, ) # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation print('Loading data') with np.load(valohai.inputs('dataset').path(), allow_pickle=True) as file: x_train, y_train = file['x_train'], file['y_train'] x_test, y_test = file['x_test'], file['y_test'] print('Preprocessing data') x_train, x_test = x_train / 255.0, x_test / 255.0 # Write output files to Valohai outputs directory # This enables Valohai to version your data # and upload output it to the default data store print('Saving preprocessed data') path = valohai.outputs().path('preprocessed_mnist.npz') np.savez_compressed(path, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
import numpy as np import valohai as vh vh.prepare(step='Preprocess data') # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation with np.load(vh.inputs('mnist').path(), allow_pickle=True) as file: x_train, y_train = file['x_train'], file['y_train'] x_test, y_test = file['x_test'], file['y_test'] x_train, x_test = x_train / 255.0, x_test / 255.0 # Write output files to Valohai outputs directory # This enables Valohai to version your data # and upload output it to the default data store path = vh.outputs('dataset').path('preprocessed_mnist.npz') np.savez(path, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
# This enables Valohai to version your metadata # and for you to use it to compare experiments def log(epoch, logs): with vh.logger() as logger: logger.log('epoch', epoch) logger.log('accuracy', logs['accuracy']) logger.log('loss', logs['loss']) cb = tf.keras.callbacks.LambdaCallback(on_epoch_end=log) model.fit(x_train, y_train, epochs=vh.parameters('epochs').value, callbacks=[cb]) # Evaluate the model and print out the test metrics as JSON test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2) with vh.logger() as logger: logger.log('test_accuracy', test_acc) logger.log('test_loss', test_loss) # Write output files to Valohai outputs directory # This enables Valohai to version your data # and upload output it to the default data store model.save(vh.outputs('model').path('model.h5'))
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step train_model.py` valohai.prepare( step='train-model', image='tensorflow/tensorflow:2.6.0', default_inputs={ 'dataset': 'https://valohaidemo.blob.core.windows.net/mnist/preprocessed_mnist.npz', }, default_parameters={ 'learning_rate': 0.001, 'epochs': 5, }, ) # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation input_path = valohai.inputs('dataset').path() with np.load(input_path, allow_pickle=True) as f: x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10), ]) optimizer = tf.keras.optimizers.Adam( learning_rate=valohai.parameters('learning_rate').value) loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy']) # Print metrics out as JSON # This enables Valohai to version your metadata # and for you to use it to compare experiments callback = tf.keras.callbacks.LambdaCallback(on_epoch_end=log_metadata) model.fit(x_train, y_train, epochs=valohai.parameters('epochs').value, callbacks=[callback]) # Evaluate the model and print out the test metrics as JSON test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2) with valohai.logger() as logger: logger.log('test_accuracy', test_accuracy) logger.log('test_loss', test_loss) # Write output files to Valohai outputs directory # This enables Valohai to version your data # and upload output it to the default data store suffix = uuid.uuid4() output_path = valohai.outputs().path(f'model-{suffix}.h5') model.save(output_path)
def test_get_output_path(outputs_path, name, fragment): path = valohai.outputs(name).path(fragment) assert path == os.path.join(outputs_path, name, fragment) assert os.path.isdir(os.path.dirname(path))
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step compare_predictions.py` valohai.prepare( step='compare-predictions', image='python:3.9', default_inputs={ 'predictions': { 'default': None, 'optional': False, }, 'models': [], }, ) # here we have some simple example logic to compare predictions to figure out which # predictions are the best, so this varies from use-case to use-case BestModel = namedtuple('BestModel', 'prediction, average_best_guess, model') best_of_best = BestModel(prediction=None, average_best_guess=None, model=None) average_best_guesses = dict() model_filename = '' for prediction_path in valohai.inputs('predictions').paths(): filename = os.path.basename(prediction_path) extension = os.path.splitext(prediction_path)[1].lower() if extension != '.json': print(f'{filename} is not a JSON file') continue with open(prediction_path, 'r') as file: blob = json.load(file) best_guess_probabilities = [] for sample_filename, prediction in blob.items(): best_guess = str(prediction['best_guess']) probability = prediction['predictions'][best_guess] best_guess_probabilities.append(float32(probability)) average_best_guess = sum(best_guess_probabilities) / len( best_guess_probabilities) average_best_guesses[filename] = average_best_guess print( f'{filename} => {average_best_guess} (average best guess probability)' ) suffix = filename.split('predictions-')[1].split('.json')[0] model_filename = f"model-{suffix}.h5" if not best_of_best.average_best_guess or average_best_guess > best_of_best.average_best_guess: best_of_best = BestModel( prediction=filename, average_best_guess=average_best_guess, model=model_filename, ) print( f'The best model is the one that generated {best_of_best.prediction} ({best_of_best.average_best_guess})' ) model_path = next((model for model in valohai.inputs('models').paths() if model_filename in model), '') if model_path: shutil.copy(model_path, valohai.outputs().path(model_filename))
import matplotlib.pyplot as plt import numpy as np import valohai np.random.seed(19680801) data = np.random.randn(2, 100) fig, axs = plt.subplots(2, 2, figsize=(5, 5)) axs[0, 0].hist(data[0]) axs[1, 0].scatter(data[0], data[1]) axs[0, 1].plot(data[0], data[1]) axs[1, 1].hist2d(data[0], data[1]) save_path = valohai.outputs().path('myplot.png') plt.savefig(save_path) plt.show()