Ejemplo n.º 1
0
spark = SparkSession.builder.getOrCreate()

from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext, SparkSession
from pyspark.sql.types import StructType, StructField, DoubleType, IntegerType, StringType
sc = SparkContext.getOrCreate(SparkConf().setMaster("local[*]"))
from pyspark.sql import SparkSession
spark = SparkSession \
    .builder \
    .getOrCreate()





#systemML

!pip install https://github.com/IBM/coursera/blob/master/systemml-1.3.0-SNAPSHOT-python.tar.gz?raw=true


!mkdir -p /home/dsxuser/work/systemml

from systemml import MLContext, dml
ml = MLContext(spark)
ml.setConfigProperty("sysml.localtmpdir", "mkdir /home/dsxuser/work/systemml")
print(ml.version())
    
if not ml.version() == '1.3.0-SNAPSHOT':
    raise ValueError('please upgrade to SystemML 1.3.0, or restart your Kernel (Kernel->Restart & Clear Output)')

# Please fill in the sections labelled with "###YOUR_CODE_GOES_HERE###"
#
# The purpose of this assignment is to learn how feature engineering boosts model performance. You will apply Discrete Fourier Transformation on the accelerometer sensor time series and therefore transforming the dataset from the time to the frequency domain.
#
# After that, you’ll use a classification algorithm of your choice to create a model and submit the new predictions to the grader. Done.
#
# Please make sure you run this notebook from an Apache Spark 2.3 notebook.
#
# So the first thing we need to ensure is that we are on the latest version of SystemML, which is 1.3.0 (as of 20th March'19) Please use the code block below to check if you are already on 1.3.0 or higher. 1.3 contains a necessary fix, that's we are running against the SNAPSHOT
#

# In[1]:

from systemml import MLContext
ml = MLContext(spark)
ml.version()

#
#
# If you are blow version 1.3.0, or you got the error message "No module named 'systemml'"  please execute the next two code blocks and then
#
# # PLEASE RESTART THE KERNEL !!!
#
# Otherwise your changes won't take effect, just double-check every time you run this notebook if you are on SystemML 1.3
#

# In[1]:

get_ipython().system(
    'pip install https://github.com/IBM/coursera/blob/master/systemml-1.3.0-SNAPSHOT-python.tar.gz?raw=true'
)
spark = SparkSession.builder.getOrCreate()

# In[5]:

get_ipython().system('mkdir -p /home/dsxuser/work/systemml')

# In[6]:

from systemml import MLContext, dml
import numpy as np
import time

ml = MLContext(spark)
ml.setConfigProperty("sysml.localtmpdir", "mkdir /home/dsxuser/work/systemml")
print(ml.version())

if not ml.version() == '1.2.0':
    raise ValueError(
        'please upgrade to SystemML 1.2.0, or restart your Kernel (Kernel->Restart & Clear Output)'
    )

# Congratulations, if you see version 1.2.0, please continue with the notebook...

# We use an MLContext to interface with Apache SystemML. Note that we passed a SparkSession object as parameter so SystemML now knows how to talk to the Apache Spark cluster

# Now we create some large random matrices to have numpy and SystemML crunch on it

# In[7]:

u = np.random.rand(1000, 10000)