Exemple #1
0
def test_null():
    s = io.StringIO()
    dotscience.start()
    dotscience.publish(stream=s)
    m = _parse(s.getvalue())
    assert m["input"] == []
    assert m["output"] == []
    assert m["labels"] == {}
    assert m["parameters"] == {}
    assert m["summary"] == {}
    assert m["workload-file"] == TEST_WORKLOAD_FILE
Exemple #2
0
def test_parameter_n(a, b):
    s = io.StringIO()
    dotscience.start()
    dotscience.add_parameters("a", a)
    dotscience.add_parameters(b=b)
    dotscience.publish(stream=s)
    m = _parse(s.getvalue())
    assert m["output"] == []
    assert m["input"] == []
    assert m["parameters"] == {"a": a, "b": b}
    assert m["labels"] == {}
    assert m["summary"] == {}
    assert m["workload-file"] == TEST_WORKLOAD_FILE
Exemple #3
0
def test_output_1b(d):
    s = io.StringIO()
    dotscience.start()
    dp = tidy_path(os.getcwd() + "/" + d)
    assert dotscience.output(dp) == dp
    dotscience.publish(stream=s)
    m = _parse(s.getvalue())
    assert m["output"] == [os.path.relpath(dp, start=os.getcwd())]
    assert m["input"] == []
    assert m["labels"] == {}
    assert m["parameters"] == {}
    assert m["summary"] == {}
    assert m["workload-file"] == TEST_WORKLOAD_FILE
Exemple #4
0
def test_multi_publish_1():
    s1 = io.StringIO()
    dotscience.start()
    dotscience.publish("Hello", stream=s1)
    s2 = io.StringIO()
    dotscience.publish("World", stream=s2)
    m1 = _parse(s1.getvalue())
    m2 = _parse(s2.getvalue())

    assert m1["description"] == "Hello"
    assert m2["description"] == "World"
    assert m1["__ID"] != m2["__ID"]
    assert m1["start"] != m2["start"]
    assert m1["end"] != m2["end"]
Exemple #5
0
def test_output_n(d):
    d = set([tidy_path(os.getcwd() + "/" + x) for x in d])
    s = io.StringIO()
    dotscience.start()
    dotscience.add_outputs(*d)
    dotscience.publish(stream=s)
    m = _parse(s.getvalue())
    assert len(m["output"]) == len(d) and sorted(m["output"]) == sorted(
        [os.path.relpath(x, start=os.getcwd()) for x in d])
    assert m["input"] == []
    assert m["labels"] == {}
    assert m["parameters"] == {}
    assert m["summary"] == {}
    assert m["workload-file"] == TEST_WORKLOAD_FILE
Exemple #6
0
def test_start_end():
    s = io.StringIO()
    dotscience.start()
    t1 = dotscience._defaultDS.currentRun._start
    dotscience.end()
    t2 = dotscience._defaultDS.currentRun._end
    dotscience.publish(stream=s)
    m = _parse(s.getvalue())
    assert m["start"] == t1.strftime("%Y%m%dT%H%M%S.%f")
    assert m["end"] == t2.strftime("%Y%m%dT%H%M%S.%f")
    assert m["input"] == []
    assert m["output"] == []
    assert m["labels"] == {}
    assert m["parameters"] == {}
    assert m["summary"] == {}
    assert m["workload-file"] == TEST_WORKLOAD_FILE
Exemple #7
0
import dotscience as ds
ds.script()
ds.start()

ds.publish("hello, world")
Exemple #8
0
ds.summary("accuracy", model.evaluate(X_test_preprocessed, y_test)[1])

# Fetch the Keras session and save the model
# The signature definition is defined by the input and output tensors,
# and stored with the default serving key

MODEL_DIR = "../model"
version = 1
export_path = os.path.join(MODEL_DIR, str(version))
print('export_path = {}\n'.format(export_path))
if os.path.isdir(export_path):
    print('\nAlready saved a model, cleaning up\n')
    os.system("rm -r " + export_path)

tf.saved_model.simple_save(tf.keras.backend.get_session(),
                           export_path,
                           inputs={'input_image': model.input},
                           outputs={t.name: t
                                    for t in model.outputs})

ds.label("model.directory", ds.output("../model"))
ds.label("model.framework", "tensorflow")
ds.label("model.framework.version", tf.__version__)

for root, dirs, files in os.walk("../model"):
    for name in files:
        ds.output(os.path.join(root, name))

ds.publish()
Exemple #9
0
from shutil import copyfile

ds.start()

roadsigns = pickle.load(open(ds.input("s3/roadsigns.p"),"rb"))

if not os.path.exists("data"):
    os.mkdir("data")

# Sample ranges of the data

samples = [{"small-train": 10000, "small-test": 1000, "small-validate": 500},
           {"large-train": 50000, "large-test": 1000, "large-validate": 839}]

for sampleset in samples:
    i = 0
    for k in sorted(sampleset.keys()):
        count = sampleset[k]
        range_start = i
        range_end = i+count
        i += count
        print ("sample", k, "start", range_start, "end", range_end)
        result = {x: roadsigns[x][range_start:range_end] for x in roadsigns.keys()}
        pickle.dump(result, open(ds.output("data/%s.p" % (k,)), "wb"))

ds.publish("created small and large sample sets from raw data in S3")

# just make a copy of the labels so we keep them together with the data
ds.start()
copyfile(ds.input("s3/signnames.csv"), ds.output("data/signnames.csv"))
ds.publish("copied signnames.csv from S3")
Exemple #10
0
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])

model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', ds.metric("loss", score[0]))
print('Test accuracy:', ds.metric("accuracy", score[1]))

if os.path.isdir("model"):
    shutil.rmtree("model", ignore_errors=True)

MODEL_DIR = "./model"
export_path = os.path.join(MODEL_DIR)

model.save(export_path)

# copy file into the model dir for the upload
shutil.copyfile("classes.json", "model/classes.json")

ds.model(tf, "mnist", "model", classes="model/classes.json")
ds.publish("trained mnist model", deploy=True)
Exemple #11
0
import dotscience as ds
import os
import shutil
import sklearn
from sklearn import svm
from sklearn import datasets
from pickle import dump

if os.path.isdir("model"):
    shutil.rmtree("model", ignore_errors=True)
os.mkdir("model")
os.chdir("model")

ds.connect(os.getenv("DOTSCIENCE_USERNAME"), os.getenv("DOTSCIENCE_APIKEY"),
           os.getenv("DOTSCIENCE_PROJECT_NAME"), os.getenv("DOTSCIENCE_URL"))

clf = svm.SVC(gamma='scale', probability=True)
iris = datasets.load_iris()
X, y = iris.data, iris.target
clf.fit(X, y)

dump(clf, open("model.joblib", "wb"))

# copy file into the model dir for the upload
shutil.copyfile("../classes_iris.json", ds.output("classes.json"))

ds.model(sklearn, "iris", ds.output("model.joblib"), classes="classes.json")

ds.publish("trained iris model", deploy=True)