예제 #1
0
 def test_input_single(self):
     script = """
     x2 = x1 + 1
     x3 = x1 + 2
     """
     script = dml(script).input("x1", 5).output("x2", "x3")
     self.assertEqual(ml.execute(script).get("x2", "x3"), [6, 7])
예제 #2
0
 def test_matrix_toNumPy(self):
     script = """
     m2 = m1 * 2
     """
     rdd1 = sc.parallelize(["1.0,2.0", "3.0,4.0"])
     script = dml(script).input(m1=rdd1).output("m2")
     m2 = ml.execute(script).get("m2")
     self.assertTrue((m2.toNumPy() == np.array([[2.0, 4.0], [6.0, 8.0]])).all())
예제 #3
0
 def test_matrix_toDF(self):
     sums = """
     s1 = sum(m1)
     m2 = m1 * 2
     """
     rdd1 = sc.parallelize(["1.0,2.0", "3.0,4.0"])
     script = dml(sums).input(m1=rdd1).output("m2")
     m2 = ml.execute(script).get("m2")
     self.assertEqual(repr(m2.toDF()), "DataFrame[__INDEX: double, C1: double, C2: double]")
예제 #4
0
 def test_output_list(self):
     script = """
     x1 = 0.2
     x2 = x1 + 1
     x3 = x1 + 2
     """
     script = dml(script).output("x1", "x2", "x3")
     self.assertEqual(ml.execute(script).get("x1", "x2"), [0.2, 1.2])
     self.assertEqual(ml.execute(script).get("x1", "x3"), [0.2, 2.2])
예제 #5
0
 def test_output_matrix(self):
     sums = """
     s1 = sum(m1)
     m2 = m1 * 2
     """
     rdd1 = sc.parallelize(["1.0,2.0", "3.0,4.0"])
     script = dml(sums).input(m1=rdd1).output("s1", "m2")
     s1, m2 = ml.execute(script).get("s1", "m2")
     self.assertEqual((s1, repr(m2)), (10.0, "Matrix"))
예제 #6
0
 def test_rdd(self):
     sums = """
     s1 = sum(m1)
     s2 = sum(m2)
     s3 = 'whatever'
     """
     rdd1 = sc.parallelize(["1.0,2.0", "3.0,4.0"])
     rdd2 = sc.parallelize(["5.0,6.0", "7.0,8.0"])
     script = dml(sums).input(m1=rdd1).input(m2=rdd2).output("s1", "s2", "s3")
     self.assertEqual(ml.execute(script).get("s1", "s2", "s3"), [10.0, 26.0, "whatever"])
예제 #7
0
def dft_systemml(signal, name):
    prog = dml(dml_script).input('signal', signal).output('DFT')

    return (

        #execute the script inside the SystemML engine running on top of Apache Spark
        ml.execute(prog)

        #read result from SystemML execution back as SystemML Matrix
        .get('DFT')

        #convert SystemML Matrix to ApacheSpark DataFrame
        .toDF()

        #rename default column names
        .selectExpr('C1 as %sa' % (name), 'C2 as %sb' % (name))

        #add unique ID per row for later joining
        .withColumn("id", monotonically_increasing_id()))
 def test_input(self):
     script = """
     x3 = x1 + x2
     """
     script = dml(script).input(x1=5, x2=3).output("x3")
     self.assertEqual(ml.execute(script).get("x3"), 8)
 def test_output_string(self):
     script = dml("x1 = 'Hello World'").output("x1")
     self.assertEqual(ml.execute(script).get("x1"), "Hello World")
예제 #10
0
 def test_input(self):
     script = """
     x3 = x1 + x2
     """
     script = dml(script).input(x1=5, x2=3).output("x3")
     self.assertEqual(ml.execute(script).get("x3"), 8)
예제 #11
0
 def test_output_string(self):
     script = dml("x1 = 'Hello World'").output("x1")
     self.assertEqual(ml.execute(script).get("x1"), "Hello World")
예제 #12
0

# To get consistent results we switch from a random matrix initialization to something deterministic

# In[24]:


u = np.arange(100000).reshape((100, 1000))
s = np.arange(100000).reshape((1000, 100))
w = np.arange(10000).reshape((100, 100))


# In[25]:


prog = dml(script).input('U', u).input('S', s).input('W', w).output('res')
prog = dml(script).output('res')
res = ml.execute(prog).get('res')
print(res)


# If everything runs fine you should get *6244089899151.321* as result. Feel free to submit your DML script to the grader now!
# 
# ### Submission

# In[26]:


get_ipython().system(u'rm -f rklib.py')
get_ipython().system(u'wget https://raw.githubusercontent.com/romeokienzler/developerWorks/master/coursera/ai/rklib.py')
# In order to show you the advantage of SystemML over numpy we've blown up the sizes of the matrices. Unfortunately, on a 1-2 worker Spark cluster it takes quite some time to complete. Therefore we've stripped down the example to smaller matrices below, but we've kept the code, just in case you are curious to check it out. But you might want to use some more workers which you easily can configure in the environment settings of the project within Watson Studio. Just be aware that you're currently limited to free 50 capacity unit hours per month wich are consumed by the additional workers.

# In[7]:

script = """
U = rand(rows=1000,cols=10000, seed=5)
S = rand(rows=10000,cols=1000, seed=23)
W = rand(rows=1000,cols=1000, seed=42)
res = sum(t(U) %*% (W * (U %*% S)))
"""

# To get consistent results we switch from a random matrix initialization to something deterministic

# In[8]:

prog = dml(script).output('res')
res = ml.execute(prog).get('res')
print(res)

# If everything runs fine you should get *6252492444241.075* as result (or something in that bullpark). Feel free to submit your DML script to the grader now!
#
# ### Submission

# In[9]:

get_ipython().system(u'rm -f rklib.py')
get_ipython().system(
    u'wget https://raw.githubusercontent.com/romeokienzler/developerWorks/master/coursera/ai/rklib.py'
)

# In[11]:
import os
import numpy as np
from pyspark.sql.functions import col, max
import systemml  # pip3 install systemml
from systemml import MLContext, dml
from pyspark.context import SparkContext
from pyspark.sql import SQLContext

sc = SparkContext()
sqlContext = SQLContext(sc)
ml = MLContext(sc)
# train_df = sqlContext.read.load('data/train_256.parquet')
val_df = sqlContext.read.load('data/val_256.parquet')

X_val = val_df.select("__INDEX", "sample")
ml.setStatistics(True).setStatisticsMaxHeavyHitters(30).setExplain(True)
script = dml("resnet_prediction_parfor_rowwisecropping.dml").input(
    X=X_val).output("Y")
Y = ml.execute(script).get("Y").toDF()
Y.show()
예제 #15
0
    start_w = ceil((Win - Wout) / 2)
    end_w = start_w + Wout - 1
    mask = matrix(0, rows=Hin, cols=Win)
    temp_mask = matrix(1, rows=Hout, cols=Wout)
    mask[start_h:end_h, start_w:end_w] = temp_mask
    mask = matrix(mask, rows=1, cols=Hin*Win)
    mask = cbind(cbind(mask, mask), mask)
    out = removeEmpty(target=(input+1), margin="cols", select=mask) - 1
  }
  X = crop_rgb(X, 256, 256, 224, 224)
  # Scale images to [-1,1]
  X = X / 255
  X = X * 2 - 1
  # One-hot encode the labels
  num_tumor_classes = 3
  n = nrow(y)
  Y = table(seq(1, n), y, n, num_tumor_classes)
  """
    outputs = ("X", "Y")
    script = dml(script).input(X=X_df, y=y_df).output(*outputs)
    X, Y = ml.execute(script).get(*outputs)
    return X, Y


X_val, Y_val = preprocess(val_df)

ml.setStatistics(True).setStatisticsMaxHeavyHitters(30).setExplain(True)
script = dml("resnet_prediction_parfor.dml").input(X=X_val).output("Y")
Y = ml.execute(script).get("Y").toDF()
Y.show()