コード例 #1
0
if __name__ == '__main__':
    sprk = Spark_Session()
    conn = sprk.Spark_Context()
    sql_conn =sprk.Spark_Connect()
    dept_rdd = conn.textFile("/Users/shuvamoymondal/Downloads/Dept.txt")
    ## Create RDD from file convert to line to a Row
    emp_rdd = conn.textFile("/Users/shuvamoymondal/Downloads/Emp.txt").map(lambda s: s.split(","))
    print(emp_rdd.take(3))
    # Each line is converted to a tuple.
    emp = emp_rdd.map(lambda s: (s[0],s[1].strip(),s[2].strip(),s[3].strip(),s[4],s[5]))
    print(emp.take(2))


# The schema is encoded in a string.
schemaString = "Id Name Desgn Age Sal Dept_ID"

## Now read all the field defined above as String type and crete StructField
## Next bind all the field to type
fields = [StructField(field_name, StringType(), True) for field_name in schemaString.split()]
schema = StructType(fields)

# Apply the schema to the RDD.
df = sql_conn.createDataFrame(emp,schema)
df.show()
df.createOrReplaceTempView("emp")
sqlContext.sql("select sum(")

sqlContext=SQLContext(conn)
data = sqlContext.load(source="com.databricks.spark.csv", path = '/Users/shuvamoymondal/Downloads/SalesJan2009', header = True,inferSchema = True)
data.show(2)
    ascontext = spss.pyspark.runtime.getContext()
    sc = ascontext.getSparkContext()
    df = ascontext.getSparkInputData()
    model_type = '%%model_type%%'
    target = '%%target_field%%'
    lambda_param = float('%%lambda%%')
    predictors = map(lambda x: x.strip(),"%%predictor_fields%%".split(","))

    modelpath = ascontext.createTemporaryFolder()
except:
    import os
    sc = SparkContext('local')
    sqlCtx = SQLContext(sc)
    # get an input dataframe with sample data by looking in working directory for file DRUG1N.json
    wd = os.getcwd()
    df = sqlCtx.load("file://"+wd+"/DRUG1N.json","json").repartition(4)
    # specify predictors and target
    predictors = ["Drug","BP", "Sex", "Age"]
    target = "Cholesterol"
    lambda_param = 1.0

    modelpath_base = "/tmp/model1234"
    import shutil
    try:
        shutil.rmtree(modelpath_base)
    except:
        pass
    modelpath = "file://"+modelpath_base+"/model"
    metadatapath = modelpath_base+"/metadata"

mbr = ModelBuildReporter(sc)
コード例 #3
0
from pyspark.mllib.regression import LabeledPoint
from pyspark.mllib.linalg import DenseVector
from sklearn import preprocessing, svm
import time
import sys
import os

import json
ascontext = None
if len(sys.argv) > 1 and sys.argv[1] == "-test":
    import os
    sc = SparkContext('local')
    sqlCtx = SQLContext(sc)
    # get an input dataframe with sample data by looking in working directory for file DRUG1N.json
    wd = os.getcwd()
    df = sqlCtx.load("file://" + wd + "/DRUG1N.json", "json").repartition(4)
    # specify predictors and target
    fields = ["Age", "K", "Na"]
    target = "BP"
    modelpath = "/tmp/svm.model"
    modelmetadata_path = "/tmp/svm.metadata"
    kernel = "linear"
    tol = 0.001
    coef0 = 0.0
    gamma = 'auto'
    degree = 3
    shrinking = True
else:
    import spss.pyspark.runtime
    ascontext = spss.pyspark.runtime.getContext()
    sc = ascontext.getSparkContext()