#
# Please fill in the sections labelled with "###YOUR_CODE_GOES_HERE###"
#
# The purpose of this assignment is to learn how feature engineering boosts model performance. You will apply Discrete Fourier Transformation on the accelerometer sensor time series and therefore transforming the dataset from the time to the frequency domain.
#
# After that, you’ll use a classification algorithm of your choice to create a model and submit the new predictions to the grader. Done.
#
# Please make sure you run this notebook from an Apache Spark 2.3 notebook.
#
# So the first thing we need to ensure is that we are on the latest version of SystemML, which is 1.3.0 (as of 20th March'19) Please use the code block below to check if you are already on 1.3.0 or higher. 1.3 contains a necessary fix, that's we are running against the SNAPSHOT
#

# In[1]:

from systemml import MLContext
ml = MLContext(spark)
ml.version()

#
#
# If you are blow version 1.3.0, or you got the error message "No module named 'systemml'"  please execute the next two code blocks and then
#
# # PLEASE RESTART THE KERNEL !!!
#
# Otherwise your changes won't take effect, just double-check every time you run this notebook if you are on SystemML 1.3
#

# In[1]:

get_ipython().system(
    'pip install https://github.com/IBM/coursera/blob/master/systemml-1.3.0-SNAPSHOT-python.tar.gz?raw=true'
Esempio n. 2
0
df = spark.read.load('shake_classification', "org.apache.bahir.cloudant")
df.createOrReplaceTempView("df")

# We need to make sure SystemML is installed.
#

# In[4]:

get_ipython().system(u'pip install systemml')

# We’ll use Apache SystemML to implement Discrete Fourier Transformation. This way all computation continues to happen on the Apache Spark cluster for advanced scalability and performance.

# In[5]:

from systemml import MLContext, dml
ml = MLContext(spark)

# As you’ve learned from the lecture, implementing Discrete Fourier Transformation in a linear algebra programming language is simple. Apache SystemML DML is such a language and as you can see the implementation is straightforward and doesn’t differ too much from the mathematical definition (Just note that the sum operator has been swapped with a vector dot product using the %*% syntax borrowed from R
# ):
#
# <img style="float: left;" src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1af0a78dc50bbf118ab6bd4c4dcc3c4ff8502223">
#
#

# In[6]:

dml_script = '''
PI = 3.141592654
N = nrow(signal)

n = seq(0, N-1, 1)
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
import unittest

from pyspark.context import SparkContext

from systemml import MLContext, dml, pydml

sc = SparkContext()
ml = MLContext(sc)


class TestAPI(unittest.TestCase):
    def test_output_string(self):
        script = dml("x1 = 'Hello World'").output("x1")
        self.assertEqual(ml.execute(script).get("x1"), "Hello World")

    def test_output_list(self):
        script = """
        x1 = 0.2
        x2 = x1 + 1
        x3 = x1 + 2
        """
        script = dml(script).output("x1", "x2", "x3")
        self.assertEqual(ml.execute(script).get("x1", "x2"), [0.2, 1.2])
Esempio n. 4
0
# coding: utf-8

# # Assignment 4
# ## Understaning scaling of linear algebra operations on Apache Spark using Apache SystemML
# 
# In this assignment we want you to understand how to scale linear algebra operations from a single machine to multiple machines, memory and CPU cores using Apache SystemML. Therefore we want you to understand how to migrate from a numpy program to a SystemML DML program. Don't worry. We will give you a lot of hints. Finally, you won't need this knowledge anyways if you are sticking to Keras only, but once you go beyond that point you'll be happy to see what's going on behind the scenes. Please make sure you run this notebook from an Apache Spark 2.3 notebook.
# 
# So the first thing we need to ensure is that we are on the latest version of SystemML, which is 1.2.0 (as of Feb. '19)
# Please use the code block below to check if you are already on 1.2.0 or higher.

# In[1]:


from systemml import MLContext
ml = MLContext(spark)
ml.version()


# If you are blow version 1.2.0 please execute the next two code blocks

# In[7]:


get_ipython().system(u'pip install systemml')


# Now we need to create two sym links that the newest version is picket up - this is a workaround and will be removed soon

# In[2]:
Esempio n. 5
0
# In[1]:

get_ipython().system(u'pip install --upgrade systemml')

# In[2]:

from systemml import MLContext, dml
import numpy as np
import time

# Then we create an MLContext to interface with Apache SystemML. Note that we pass a SparkSession object as parameter so SystemML now knows how to talk to the Apache Spark cluster

# In[3]:

ml = MLContext(spark)

# Now we create some large random matrices to have numpy and SystemML crunch on it

# In[4]:

u = np.random.rand(1000, 10000)
s = np.random.rand(10000, 1000)
w = np.random.rand(1000, 1000)

# Now we implement a short one-liner to define a very simple linear algebra operation
#
# In case you are not familiar with matrix-matrix multiplication: https://en.wikipedia.org/wiki/Matrix_multiplication
#
# sum(U' * (W . (U * S)))
#
import os
import numpy as np
from pyspark.sql.functions import col, max
import systemml  # pip3 install systemml
from systemml import MLContext, dml
from pyspark.context import SparkContext
from pyspark.sql import SQLContext

sc = SparkContext()
sqlContext = SQLContext(sc)
ml = MLContext(sc)
# train_df = sqlContext.read.load('data/train_256.parquet')
val_df = sqlContext.read.load('data/val_256.parquet')

X_val = val_df.select("__INDEX", "sample")
ml.setStatistics(True).setStatisticsMaxHeavyHitters(30).setExplain(True)
script = dml("resnet_prediction_parfor_rowwisecropping.dml").input(
    X=X_val).output("Y")
Y = ml.execute(script).get("Y").toDF()
Y.show()
Esempio n. 7
0
spark = SparkSession.builder.getOrCreate()

from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext, SparkSession
from pyspark.sql.types import StructType, StructField, DoubleType, IntegerType, StringType
sc = SparkContext.getOrCreate(SparkConf().setMaster("local[*]"))
from pyspark.sql import SparkSession
spark = SparkSession \
    .builder \
    .getOrCreate()





#systemML

!pip install https://github.com/IBM/coursera/blob/master/systemml-1.3.0-SNAPSHOT-python.tar.gz?raw=true


!mkdir -p /home/dsxuser/work/systemml

from systemml import MLContext, dml
ml = MLContext(spark)
ml.setConfigProperty("sysml.localtmpdir", "mkdir /home/dsxuser/work/systemml")
print(ml.version())
    
if not ml.version() == '1.3.0-SNAPSHOT':
    raise ValueError('please upgrade to SystemML 1.3.0, or restart your Kernel (Kernel->Restart & Clear Output)')

sc = SparkContext.getOrCreate(SparkConf().setMaster("local[*]"))
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

# In[5]:

get_ipython().system('mkdir -p /home/dsxuser/work/systemml')

# In[6]:

from systemml import MLContext, dml
import numpy as np
import time

ml = MLContext(spark)
ml.setConfigProperty("sysml.localtmpdir", "mkdir /home/dsxuser/work/systemml")
print(ml.version())

if not ml.version() == '1.2.0':
    raise ValueError(
        'please upgrade to SystemML 1.2.0, or restart your Kernel (Kernel->Restart & Clear Output)'
    )

# Congratulations, if you see version 1.2.0, please continue with the notebook...

# We use an MLContext to interface with Apache SystemML. Note that we passed a SparkSession object as parameter so SystemML now knows how to talk to the Apache Spark cluster

# Now we create some large random matrices to have numpy and SystemML crunch on it

# In[7]: