def test_null(): s = io.StringIO() dotscience.start() dotscience.publish(stream=s) m = _parse(s.getvalue()) assert m["input"] == [] assert m["output"] == [] assert m["labels"] == {} assert m["parameters"] == {} assert m["summary"] == {} assert m["workload-file"] == TEST_WORKLOAD_FILE
def test_parameter_n(a, b): s = io.StringIO() dotscience.start() dotscience.add_parameters("a", a) dotscience.add_parameters(b=b) dotscience.publish(stream=s) m = _parse(s.getvalue()) assert m["output"] == [] assert m["input"] == [] assert m["parameters"] == {"a": a, "b": b} assert m["labels"] == {} assert m["summary"] == {} assert m["workload-file"] == TEST_WORKLOAD_FILE
def test_output_1b(d): s = io.StringIO() dotscience.start() dp = tidy_path(os.getcwd() + "/" + d) assert dotscience.output(dp) == dp dotscience.publish(stream=s) m = _parse(s.getvalue()) assert m["output"] == [os.path.relpath(dp, start=os.getcwd())] assert m["input"] == [] assert m["labels"] == {} assert m["parameters"] == {} assert m["summary"] == {} assert m["workload-file"] == TEST_WORKLOAD_FILE
def test_multi_publish_1(): s1 = io.StringIO() dotscience.start() dotscience.publish("Hello", stream=s1) s2 = io.StringIO() dotscience.publish("World", stream=s2) m1 = _parse(s1.getvalue()) m2 = _parse(s2.getvalue()) assert m1["description"] == "Hello" assert m2["description"] == "World" assert m1["__ID"] != m2["__ID"] assert m1["start"] != m2["start"] assert m1["end"] != m2["end"]
def test_output_n(d): d = set([tidy_path(os.getcwd() + "/" + x) for x in d]) s = io.StringIO() dotscience.start() dotscience.add_outputs(*d) dotscience.publish(stream=s) m = _parse(s.getvalue()) assert len(m["output"]) == len(d) and sorted(m["output"]) == sorted( [os.path.relpath(x, start=os.getcwd()) for x in d]) assert m["input"] == [] assert m["labels"] == {} assert m["parameters"] == {} assert m["summary"] == {} assert m["workload-file"] == TEST_WORKLOAD_FILE
def test_start_end(): s = io.StringIO() dotscience.start() t1 = dotscience._defaultDS.currentRun._start dotscience.end() t2 = dotscience._defaultDS.currentRun._end dotscience.publish(stream=s) m = _parse(s.getvalue()) assert m["start"] == t1.strftime("%Y%m%dT%H%M%S.%f") assert m["end"] == t2.strftime("%Y%m%dT%H%M%S.%f") assert m["input"] == [] assert m["output"] == [] assert m["labels"] == {} assert m["parameters"] == {} assert m["summary"] == {} assert m["workload-file"] == TEST_WORKLOAD_FILE
import dotscience as ds ds.script() ds.start() ds.publish("hello, world")
ds.summary("accuracy", model.evaluate(X_test_preprocessed, y_test)[1]) # Fetch the Keras session and save the model # The signature definition is defined by the input and output tensors, # and stored with the default serving key MODEL_DIR = "../model" version = 1 export_path = os.path.join(MODEL_DIR, str(version)) print('export_path = {}\n'.format(export_path)) if os.path.isdir(export_path): print('\nAlready saved a model, cleaning up\n') os.system("rm -r " + export_path) tf.saved_model.simple_save(tf.keras.backend.get_session(), export_path, inputs={'input_image': model.input}, outputs={t.name: t for t in model.outputs}) ds.label("model.directory", ds.output("../model")) ds.label("model.framework", "tensorflow") ds.label("model.framework.version", tf.__version__) for root, dirs, files in os.walk("../model"): for name in files: ds.output(os.path.join(root, name)) ds.publish()
from shutil import copyfile ds.start() roadsigns = pickle.load(open(ds.input("s3/roadsigns.p"),"rb")) if not os.path.exists("data"): os.mkdir("data") # Sample ranges of the data samples = [{"small-train": 10000, "small-test": 1000, "small-validate": 500}, {"large-train": 50000, "large-test": 1000, "large-validate": 839}] for sampleset in samples: i = 0 for k in sorted(sampleset.keys()): count = sampleset[k] range_start = i range_end = i+count i += count print ("sample", k, "start", range_start, "end", range_end) result = {x: roadsigns[x][range_start:range_end] for x in roadsigns.keys()} pickle.dump(result, open(ds.output("data/%s.p" % (k,)), "wb")) ds.publish("created small and large sample sets from raw data in S3") # just make a copy of the labels so we keep them together with the data ds.start() copyfile(ds.input("s3/signnames.csv"), ds.output("data/signnames.csv")) ds.publish("copied signnames.csv from S3")
model.add(Dense(num_classes, activation='softmax')) model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', ds.metric("loss", score[0])) print('Test accuracy:', ds.metric("accuracy", score[1])) if os.path.isdir("model"): shutil.rmtree("model", ignore_errors=True) MODEL_DIR = "./model" export_path = os.path.join(MODEL_DIR) model.save(export_path) # copy file into the model dir for the upload shutil.copyfile("classes.json", "model/classes.json") ds.model(tf, "mnist", "model", classes="model/classes.json") ds.publish("trained mnist model", deploy=True)
import dotscience as ds import os import shutil import sklearn from sklearn import svm from sklearn import datasets from pickle import dump if os.path.isdir("model"): shutil.rmtree("model", ignore_errors=True) os.mkdir("model") os.chdir("model") ds.connect(os.getenv("DOTSCIENCE_USERNAME"), os.getenv("DOTSCIENCE_APIKEY"), os.getenv("DOTSCIENCE_PROJECT_NAME"), os.getenv("DOTSCIENCE_URL")) clf = svm.SVC(gamma='scale', probability=True) iris = datasets.load_iris() X, y = iris.data, iris.target clf.fit(X, y) dump(clf, open("model.joblib", "wb")) # copy file into the model dir for the upload shutil.copyfile("../classes_iris.json", ds.output("classes.json")) ds.model(sklearn, "iris", ds.output("model.joblib"), classes="classes.json") ds.publish("trained iris model", deploy=True)