Esempi in Python per LinearRegressionModel.load, esempi in Python per pyspark.mllib.regression.LinearRegressionModel.load

Esempio n. 1

0

Mostra file

File: prediction_temperature.py Progetto: DhruvKalaria/NEST-IntelligentThermostat

def prediction(hour,extern_temp):	
	if hour>=0 and hour<6:
		
		#Predict Temperature
		tempModel1 = LinearRegressionModel.load(sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model1")
		x = np.array([extern_temp])
		temp_data = tempModel1.predict(x)
		print temp_data

	elif hour>=6 and hour<12:
		
		#Predict Temperature
		tempModel2 = LinearRegressionModel.load(sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model2")
		x = np.array([extern_temp])
		temp_data = tempModel2.predict(x)
		print temp_data

	elif hour>=12 and hour<18:

		#Predict Temperature
		tempModel3 = LinearRegressionModel.load(sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model3")
		x = np.array([extern_temp])
		temp_data = tempModel3.predict(x)
		print temp_data
		
	elif hour>=18 and hour<24:

		#Predict Temperature
		tempModel4 = LinearRegressionModel.load(sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model4")
		x = np.array([extern_temp])
		temp_data = tempModel4.predict(x)
		print temp_data
	f.write('%.2f' % temp_data)
	f.write('\n')

Esempio n. 2

0

Mostra file

File: composition_prediction_system.py Progetto: WarnWang/Dissertation

    def load_parameters(self):
        self.amount_prediction_method = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL,
                                                                 file_name='amount_method')
        self.trend_prediction_method = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL,
                                                                file_name='trend_method')
        self.data_features = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL, file_name='features')
        self.stock_symbol = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL, file_name='symbol')
        self.data_parser = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL, file_name='data_parser')
        amount_model_path = os.path.join(os.path.abspath(self.model_path), 'amount_model')
        trend_model_path = os.path.join(os.path.abspath(self.model_path), 'trend_model')

        if self.amount_prediction_method == self.RANDOM_FOREST:
            amount_model = RandomForestModel.load(sc=self.sc, path=amount_model_path)
        elif self.amount_prediction_method == self.LINEAR_REGRESSION:
            amount_model = LinearRegressionModel.load(sc=self.sc, path=amount_model_path)
        else:
            amount_model = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL, file_name='amount_model')

        if self.trend_prediction_method == self.RANDOM_FOREST:
            trend_model = RandomForestModel.load(sc=self.sc, path=trend_model_path)
        elif self.trend_prediction_method == self.LOGISTIC_REGRESSION:
            trend_model = LogisticRegressionModel.load(sc=self.sc, path=trend_model_path)
        elif self.trend_prediction_method == self.NAIVE_BAYES:
            trend_model = NaiveBayesModel.load(sc=self.sc, path=trend_model_path)
        elif self.trend_prediction_method == self.SVM:
            trend_model = SVMModel.load(sc=self.sc, path=trend_model_path)
        else:
            trend_model = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL, file_name='trend_model')

        return trend_model, amount_model

Esempio n. 3

0

Mostra file

File: prediction_temperature.py Progetto: sherlock-home/Nest-Thermostat

def prediction(hour, extern_temp):
    if hour >= 0 and hour < 6:

        #Predict Temperature
        tempModel1 = LinearRegressionModel.load(
            sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model1")
        x = np.array([extern_temp])
        temp_data = tempModel1.predict(x)
        print temp_data

    elif hour >= 6 and hour < 12:

        #Predict Temperature
        tempModel2 = LinearRegressionModel.load(
            sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model2")
        x = np.array([extern_temp])
        temp_data = tempModel2.predict(x)
        print temp_data

    elif hour >= 12 and hour < 18:

        #Predict Temperature
        tempModel3 = LinearRegressionModel.load(
            sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model3")
        x = np.array([extern_temp])
        temp_data = tempModel3.predict(x)
        print temp_data

    elif hour >= 18 and hour < 24:

        #Predict Temperature
        tempModel4 = LinearRegressionModel.load(
            sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model4")
        x = np.array([extern_temp])
        temp_data = tempModel4.predict(x)
        print temp_data
    f.write('%.2f' % temp_data)
    f.write('\n')

Esempio n. 4

0

Mostra file

File: PredictingLinearModel.py Progetto: chelo26/CodeExample

def main(sc):
    features_cr = sc.pickleFile('/tmp/features_saved')
    linear_model = LinearRegressionModel.load(sc, "/tmp/linear_model")
    # Getting the features ready for predicting
    numberFeatures = len(features_cr.first()) - 1
    mappings = [get_mapping(features_cr, i) for i in range(0, numberFeatures)]

    # Month Dictionary
    dictio_month = {}
    for i in range(12):
        dictio_month[i + 1] = i
    mappings[1] = dictio_month

    cat_len = sum(map(len, mappings))

    if len(sys.argv) == 2:
        dateZoneGroup = str(sys.argv[1]).split(',')
        zones = get_group_zone(dateZoneGroup[5], 'zone', features_cr)
        groups = get_group_zone(dateZoneGroup[6], 'group', features_cr)
        year_feat = int(dateZoneGroup[0])
        month_feat = int(dateZoneGroup[1])
        day_feat = int(dateZoneGroup[3])
        startDate = date(year_feat, month_feat, day_feat)
        endDate = date(year_feat, month_feat, day_feat)
        print(startDate)
    else:
        startDate = datetime.strptime(str(sys.argv[1]), event_date).date()
        endDate = datetime.strptime(str(sys.argv[2]), event_date).date()
        zones = get_group_zone(str(sys.argv[3]), 'zone', features_cr)
        groups = get_group_zone(str(sys.argv[4]), 'group', features_cr)

    dateRangeWF = list(date_range(startDate, endDate))

    # Making predictions:

    feat_vs_pred = list()
    featPredShow = list()
    for g in groups:
        for z in zones:
            for dR in dateRangeWF:
                featureLine = format_date(dR, z, g, mappings, cat_len)
                #print(featureLine)
                featureShow = format_show(dR, z, g)
                predLine = linear_model.predict(featureLine)
                feat_vs_pred.append(list(featureLine) + [predLine])
                featPredShow.append(list(featureShow) + [predLine])

    scFeaturesPred = sc.parallelize(featPredShow)
    [print(j) for j in scFeaturesPred.collect()]

Esempio n. 5

0

Mostra file

File: finalcode.py Progetto: sasoltan/DroughtPercipitation

def prediction():
    year=yearprediction
    stations = sc.textFile(output+"/stationtextformat")
    stations = stations.map(getdata).map(lambda x: (x[0], int(year), float(x[1]), float(x[2])))
    lat = stations.map(lambda x: (x[2])).cache()
    min_lat = lat.min()
    max_lat = lat.max()

    longtitude =  stations.map(lambda x: (x[3])).cache()
    min_long = longtitude.min()
    max_long = longtitude.max()

    max_ = [float('2050'), max_lat, max_long]
    min_ = [float('1990'), min_lat, min_long]

    stations = stations.map(lambda x: scalePoint(x, max_, min_)).cache()
    stationsDF = sqlContext.createDataFrame(stations)
    # load the model
    sameModel = LinearRegressionModel.load(sc, output+"/modelpath")
    # run the model
    stationidAndPreds = stations.map(lambda p : (p[0],  float(sameModel.predict(p[1:]))))
    # the result returns a predicted value for each station (stationId) in the given year
    resultRdd = stationidAndPreds.map(rescale)
    rddschema = resultRdd.map(lambda (a,b): Row(station= a, avg_prcp=b)).cache()
    stationidAndPredsDF = sqlContext.createDataFrame(rddschema)
    stationidAndPredsDF.registerTempTable("stationPrediction")
    getCountries()
    countires = sc.textFile(output+"/countries")
    countriesRdd = countires.map(getdata)
    countries = countriesRdd.map(lambda (a,b): Row(station= a, country=b)).cache()
    countriesDF = sqlContext.createDataFrame(countries)
    countriesDF.registerTempTable("StationTable")
    countriesDF.cache()
    shortenstations = sqlContext.sql("SELECT SUBSTR(station, 1, 2) As station,avg_prcp FROM stationPrediction")
    shortenstations.show()
    joinedresult = countriesDF.join(shortenstations).where(countriesDF.station == shortenstations.station).select(shortenstations.avg_prcp, countriesDF.country)
    joinedresult.registerTempTable("joinedresult")
    results = sqlContext.sql("SELECT country, Avg(avg_prcp) as avg_prcp FROM joinedresult GROUP BY country")
    results.registerTempTable("results")
    outrdd=results.repartition(40).rdd.map(lambda l: str(l.country)+","+str(l.avg_prcp)).coalesce(1)
    path = yearprediction
    outrdd.saveAsTextFile(output+'/prediction/'+path)

Esempio n. 6

0

Mostra file

File: prediction.py Progetto: ryanstrat/stock-predictions

def CustomPredict(date_start, date_end, company):

# create spark context
    sc = SparkContext(appName="Model")
# create an api object
    api = NewsAPI.NewsAPI(date_start.month,date_start.day,date_start.year,date_end.month,date_end.day,date_end.year, company,'56283d7d6075b9d30773e1ceb440e1b2d029f438')
# load the prediction model for the company
    model = LinearRegressionModel.load(sc, company)
# getting data for the duration of time specified
    api.startGetData()
# get the sentiment average of the days
    l = api.getSentimentScore()
    mean_sent = np.mean(l)
    print ("\n\n\n\n\n" + str(mean_sent) + "\n\n\n\n\n")
# make the prediction using the loaded model
    pred = model.predict([mean_sent])
    print ("\n\n\n\n\n" + str(pred) + "\n\n\n\n\n")
# close the spark context
    sc.stop()
# return
    return pred

Esempio n. 7

0

Mostra file

File: sparkstreaming.py Progetto: sindhu819/InsightDE

def main():


    spark = SparkSession.builder.appName("TRAFFIC").config("spark.executor.cores", "4").config("spark.executor.memory", "4g").getOrCreate()
    sc = spark.sparkContext
    mapping = sc.textFile("s3a://insighttraffic/ML_model/mappings").collect()[0]
    mapping = ast.literal_eval(str(mapping))

    models=[]
    for hour in range(0, 24):
        model = LinearRegressionModel.load(sc, "s3a://insighttraffic/ML_model/linear_model_log_"+str(hour))
        models.append(model)

    category_len = 154

    sqlContext = sql.SQLContext(sc)


    hadoop_conf=sc._jsc.hadoopConfiguration()
    hadoop_conf.set("fs.s3n.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    hadoop_conf.set("fs.s3n.awsAccessKeyId", 'awsAccessKeyId')
    hadoop_conf.set("fs.s3n.awsSecretAccessKey", 'awsSecretAccessKey')

    # set microbatch interval as 10 seconds, this can be customized according to the project
    ssc = StreamingContext(sc,10)
    # directly receive the data under a certain topic
    kafkaStream = KafkaUtils.createDirectStream(ssc, ['data'], {"metadata.broker.list": 'Kafka-DNS:9092'})


    connection = psycopg2.connect(host = 'postgres-ip-address', database = 'postgres', user = '******', password = '******')
    cursor = connection.cursor()
    cursor.execute('CREATE TABLE IF NOT EXISTS realtimetraffic (sid text, location text, latitude double precision, longitude double precision,\
        direction text, lanes integer, roadtype text, highway text, current integer, historical double precision, level text, PRIMARY KEY (id));')
    cursor.execute('SELECT AddGeometryColumn (%s,%s,%s,4326,%s,2);', (public,realtimetraffic,geom,POINT,))


    #The inbound stream is a DStream
    dstream = kafkaStream.map(lambda (key, value): json.loads(value))
    dstream.foreachRDD(lambda rdd: update(rdd, models, mapping))

Esempio n. 8

0

Mostra file

        df = sparkSession.createDataFrame([(time.strftime("%Y-%m-%d %H:%M:%S"), store_id, result)], ["timePredicted", "store_id", "value"])
        df.write.format("com.mongodb.spark.sql.DefaultSource").mode("append").save()

        return predicted
    else:
	print("No data received")

if __name__ == "__main__":
    sc = SparkContext(appName="NinoxStreaming")

    my_spark = SparkSession \
        .builder \
        .appName("Ninox") \
        .config("spark.mongodb.input.uri", "mongodb://172.254.0.4:27017/predictions.data") \
        .config("spark.mongodb.output.uri", "mongodb://172.254.0.4:27017/predictions.data") \
        .getOrCreate()

    ssc = StreamingContext(sc, 10)

    # Load model from HDFS
    model = LinearRegressionModel.load(sc, "hdfs://172.254.0.2:9000/user/root/models/first.model")

    # Create stream to get kafka messages
    directKafkaStream = KafkaUtils.createDirectStream(ssc, ["incomingData"], {"metadata.broker.list": "172.254.0.7:9092"})
    
    # Predict and save to mongo
    directKafkaStream.foreachRDD(lambda time, rdd: predict(rdd, model, my_spark, time))

    ssc.start()
    ssc.awaitTermination()
    sc.stop()

Esempio n. 9

0

Mostra file

conf = SparkConf().setAppName(
    'Linear least squares, Lasso, and ridge regression').setMaster('local[2]')
sc = SparkContext(conf=conf)


# load and parse the data
def parsePoint(line):
    values = [float(x) for x in line.replace(',', ' ').split(' ')]
    return LabeledPoint(values[0], values[1:])


data = sc.textFile('../data/lpsa.data')
parseData = data.map(parsePoint)

# build the model
model = LinearRegressionWithSGD.train(parseData,
                                      iterations=100,
                                      step=0.0000001)

# evaluate the model on training data
valuesAndPreds = parseData.map(lambda p: (p.label, model.predict(p.features)))
MSE = valuesAndPreds.map(lambda (v, p): (v - p)**2).reduce(
    lambda a, b: a + b) / valuesAndPreds.count()
print('mean squared error :' + str(MSE))

# save and load model
model.save(sc, '../model/pythonLinearRegressionWithSGDModel')
sameModel = LinearRegressionModel.load(
    sc, '../model/pythonLinearRegressionWithSGDModel')
sc.stop()

Esempio n. 10

0

Mostra file

File: modelrun.py Progetto: surtha2802/BigMac-Web-Services

#!usr/local/spark/python
from pyspark import SparkContext

from pyspark.mllib.regression import LabeledPoint
import numpy as np
from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD, LinearRegressionModel
from pyspark.mllib.classification import SVMWithSGD, SVMModel
sc = SparkContext("local")
from pyspark.mllib.classification import LogisticRegressionWithLBFGS, LogisticRegressionModel
import os
features = open("/root/Desktop/features.LOL", "r")
feature = features.read().strip().split(' ')
i = map(lambda x: float(x), feature)

param = open("/root/Desktop/parameters.LOL", "r")
temp = param.readlines()
print "LOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOL"
supermodel = temp[2].strip()
Modelname = LinearRegressionModel.load(sc, supermodel)

print Modelname.predict(i), "ijfksfksnfknfn jncjnnfknsklfnsk"
r = str(Modelname.predict(i))
ss = "echo %s >/root/Desktop/predicted.txt" % r
os.system(ss)

Esempio n. 11

0

Mostra file

from pyspark.mllib.regression import LinearRegressionWithSGD, LinearRegressionModel, LabeledPoint
from pyspark import SparkContext

def parsePoint(line):

	values=[float(x) for x in line.split(',')]
	
	return LabeledPoint(values[2],[values[0], values[1]])


sc=SparkContext()

model = LinearRegressionModel.load(sc, "/home/khaled/project/tmp/lin_reg_model")

data_test=sc.textFile("/home/khaled/project/data_gen/test.csv")
data_test_parsed=data_test.map(parsePoint)
data_test2=sc.textFile("/home/khaled/project/data_gen/test2.csv")
data_test_parsed2=data_test2.map(lambda x: x.split(','))
predics=data_test_parsed.map(lambda x :model.predict(x.features))
predics2=data_test_parsed2.map(lambda x :model.predict(x))
data_itr=predics.collect()
data_itr2=predics2.collect()
f=open("predictions.txt","w+")
f.write("sbah el khir \n")
for i in data_itr:
	f.write("the ouput consumption for is:" +str(i) + "\n")
for i in data_itr2:
	f.write("the ouput consumption for is:" +str(i) + "\n")
f.close()

Esempio n. 12

0

Mostra file

File: ch07-listings.py Progetto: datumsays/Spark_In_Action_Book

validMetrics.rootMeanSquaredError
validMetrics.meanSquaredError

#Section 7.5.3
import operator
print(",".join([
    str(s)
    for s in sorted(enumerate([abs(x) for x in model.weights.toArray()]),
                    key=operator.itemgetter(0))
]))

#Section 7.5.4
model.save(sc, "ch07output/model")

from pyspark.mllib.regression import LinearRegressionModel
model = LinearRegressionModel.load(sc, "ch07output/model")


#Section 7.6.1
def iterateLRwSGD(iterNums, stepSizes, train, valid):
    from pyspark.mllib.regression import LinearRegressionWithSGD
    import math
    for numIter in iterNums:
        for step in stepSizes:
            alg = LinearRegressionWithSGD()
            model = alg.train(train,
                              iterations=numIter,
                              step=step,
                              intercept=True)
            rescaledPredicts = train.map(
                lambda x: (float(model.predict(x.features)), x.label))

Esempio n. 13

0

Mostra file

File: linear.py Progetto: psh5487/BigData_Regression

def parsePoint(line):
    values = [np.float(x) for x in line.replace(',', ' ').split(' ')]
    return LabeledPoint(values[6], values[0:6])


data = sc.textFile("/user/cloudera/hw1/train_nohead.csv")
wholedata = sc.textFile("/user/cloudera/hw1/wholedata.csv")

parsedData = data.map(parsePoint)
parsedWholeData = wholedata.map(parsePoint)

#Build the model
model = LinearRegressionWithSGD.train(parsedData, iterations=100, step=0.1)

#Evaluate the model
valuesAndPreds = parsedWholeData.map(lambda p:
                                     (p.label, model.predict(p.features)))

RMSE = np.sqrt(
    valuesAndPreds \
 .map(lambda (v, p): (v - p)**2) \
 .reduce(lambda x, y: x + y) / valuesAndPreds.count()
)
print("linear regression output : \n")
print("RMSE = {0}\n".format(RMSE))

#save and load model
model.save(sc, "/user/cloudera/hw1/results/2015310884_linear")
sameModel = LinearRegressionModel.load(
    sc, "/user/cloudera/hw1/results/2015310884_linear")

Esempio n. 14

0

Mostra file

    LabeledPoint(2.0, [1.0, 1.4]),
    LabeledPoint(4.0, [2.0, 1.9]),
    LabeledPoint(6.0, [3.0, 4.0])
]  # 训练集
lrm = LinearRegressionWithSGD.train(sc.parallelize(data),
                                    iterations=100,
                                    initialWeights=np.array([1.0, 1.0]))
print(lrm.predict(np.array([2.0, 1.0])))  # 利用训练出的回归模型进行预测

import os, tempfile
from pyspark.mllib.regression import LinearRegressionModel
from pyspark.mllib.linalg import SparseVector

path = tempfile.mkdtemp()
lrm.save(sc, path)  # 将模型保存至外存
sameModel = LinearRegressionModel.load(sc, path)  # 读取模型
print(sameModel.predict(SparseVector(2, {
    0: 100.0,
    1: 150
})))  # 利用稀疏向量作为数据结构,返回单个预测值
test_set = []
for i in range(100):
    for j in range(100):
        test_set.append(SparseVector(2, {0: i, 1: j}))
print(sameModel.predict(sc.parallelize(test_set)).collect())  # 预测多值，返回一个RDD数据集
print(sameModel.weights)  # 返回参数

# -----------------岭回归------------------

from pyspark.mllib.regression import RidgeRegressionWithSGD

Esempio n. 15

0

Mostra file

File: Stream.py Progetto: akashdaswani/masterbigdata

                type=p[7],
                velocity=int(p[8]),
                error=p[9],
                integration=int(p[10]),
                station=p[11]
            ))

        trafficDF = sqlContext.createDataFrame(trafficData)
        trafficDF.registerTempTable("traffic")

        query = sqlContext.sql("SELECT year, month, day, station, SUM(intensity) intensity  "
                                "FROM traffic "
                                "WHERE error='N' AND station = '28079004' "
                                "GROUP BY year, month, day, station "
                                "LIMIT 1")

        labelPoints = query.map(lambda line:[CommonFunctions.toWeekday(2000 + line[0], line[1], line[2]), CommonFunctions.clasification_intensity(line[4])])
        model = LinearRegressionModel.load(sc, dirTrainingModel)
        valueAir = model.predict(labelPoints.first())

        data = query.map(lambda p: Row(
            valueAir = valueAir,
            year=int(p[0]),
            month=int(p[1]),
            day=int(p[2]),
            station=p[3],
            intensity=p[4]
        ))

        print data.collect()

Esempio n. 16

0

Mostra file

File: CloudRepairDEV.py Progetto: scmphdpe/CloudRepair

# compute mean ndvi for comparison
meanNDVI = opsRDD.map(lambda x: x.label).mean()

##### use the MEAN NDVI as the prediction (for comparison)
meanRES = test.map(lambda x: (x.label,meanNDVI))
rmseVsMean = rmse(meanRES,numTest)
outString = "Simple Mean NDVI RMSE = " + str(rmseVsMean) + "\n\n"
fOut.write(outString)
######### MEAN NDVI

##### LINEAR REGRESSION WITH STOCHASTIC GRADIENT DESCENT
# if a model has already been trained, use it
# otherwise train a new one and save it
if os.path.exists('~/CloudRepair/MODELS/lrmCR'):
    lrm = LinearRegressionModel.load(sc,'~/CloudRepair/MODELS/lrmCR')
else:
    lrm = LinearRegressionWithSGD.train(training,
                                        iterations=10000,
                                        step=0.0000001,
                                        miniBatchFraction=0.10)
    lrm.save(sc, '~/CloudRepair/MODELS/lrmCR')

lrmPred = lrm.predict(test.map(lambda x: x.features))
lrmRES = test.map(lambda x: x.label).zip(lrmPred)
rmseLRM = rmse(lrmRES,numTest)
outString = "Linear Regression NDVI RMSE = " + str(rmseLRM) + "\n\n"
fOut.write(outString)
######### LINEAR REGRESSION WITH STOCHASTIC GRADIENT DESCENT

##### RANDOM FOREST optimization

Esempio n. 17

0

Mostra file

File: linearSGD.py Progetto: mayank134819/BigMac-Web-Services

for i in range(500):
	
	o.write(str(k[i][0]))
	o.write("&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp")
	o.write(str(k[i][1]))
	o.write("<br>")
	o.write("\n")
o.write("</body>")
o.write("</html>")

os.system("rm -rf /root/Desktop/mpv2/mapredtest1/templates/valpred.html")
os.system("cp /root/Desktop/valpred.html /root/Desktop/mpv2/mapredtest1/templates/valpred.html")

'''
trainerr=valuesAndPreds.filter(lambda(v,p):abs(v-p)>20000).count()/float(parsedData.count())
print valuesAndPreds.count(),"iuieuwieuiueieuiwuieuriuwieuieuiruwiurieuriue"
#print("Mean Squared Error = " + str(MSE)+"bkbkbbbbbbbbbbbbbbbbbbbbbbbbkkbkbkkkkkkkkkkkkkkkkkkkkkkkkkkkkkbkbkbkb")
print parsedtrainData.take(5),"hjjhjhjhjjjjjjjjjjjjjjjjjj"
print parsedtestData.take(1)[0].features,"lnlnlnnnlnnnnnnnnnnnnnnlnlnanslnlnslnlansdlnads"
print "answer bjbsbdjk=" , model.predict(parsedtestData.take(1)[0].features)
print parsedtestData.take(1)[0].label ,"lkmnhbgvyctexrwzea"
print parsedtrainData.take(5)
print trainerr,"rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr"
'''
# Save and load model
model.save(sc, supermodel)
sameModel = LinearRegressionModel.load(sc, supermodel)

Esempio n. 18

0

Mostra file

File: recipe9-6.py Progetto: ziwangdeng/pyspark-recipes

from pyspark.mllib.regression import LinearRegressionWithSGD as lrSGD
ourModelWithLinearRegression  = lrSGD.train(
                                                data = regressionLabelPointTrainData,
                                             iterations = 200,
                                             step = 0.02, 
                                             intercept = True)

ourModelWithLinearRegression.intercept
ourModelWithLinearRegression.weights

#Step 9-6-5. Saving the created model.

ourModelWithLinearRegression.save(sc, '/home/pysparkbook/ourModelWithLinearRegression')
from pyspark.mllib.regression import LinearRegressionModel as linearRegressModel

ourModelWithLinearRegressionReloaded = linearRegressModel.load(sc, '/home/pysparkbook/ourModelWithLinearRegression')
ourModelWithLinearRegressionReloaded.intercept
ourModelWithLinearRegressionReloaded.weights

#Step 9-6-6. Predicting the data using model.

actualDataandLinearRegressionPredictedData = regressionLabelPointTestData.map(lambda data : (float(data.label) , float(ourModelWithLinearRegression.predict(data.features))))
actualDataandLinearRegressionPredictedData.take(5)

#Step 9-6-7. Evaluating the model we have created.

from pyspark.mllib.evaluation import RegressionMetrics as rmtrcs
ourLinearRegressionModelMetrics = rmtrcs(actualDataandLinearRegressionPredictedData)
ourLinearRegressionModelMetrics.rootMeanSquaredError
ourLinearRegressionModelMetrics.r2

Esempio n. 19

0

Mostra file

File: regression.py Progetto: sasoltan/DroughtPercipitation

MSE = valuesAndPreds.map(lambda (v, p): (v - p) ** 2).reduce(lambda x, y: x + y) / valuesAndPreds.count()
print("Mean Squared Error = " + str(MSE))

# Save and load model
def save(self, sc, path):
    java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel(
        _py2java(sc, self._coeff), self.intercept
    )
    java_model.save(sc._jsc.sc(), path)


@classmethod
def load(cls, sc, path):
    java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel.load(sc._jsc.sc(), path)
    weights = _java2py(sc, java_model.weights())
    intercept = java_model.intercept()
    model = LinearRegressionModel(weights, intercept)
    return model


# Save parameters i.e. min_ and max_ on disk
f = open(params, "w")
f.write(str(str(min_) + "," + str(max_)))
f.close()


model.save(sc, myModelPath)
sameModel = LinearRegressionModel.load(sc, myModelPath)
sample = testData.map(lambda p: p.features)
predictValues = sameModel.predict(sample)

Esempio n. 20

0

Mostra file

File: predict_module.py Progetto: KevinXiaoruZhu/Data-Visualization

def run_saved_model(pr_values, sc):
    values = list(pr_values)
    predict_model = LinearRegressionModel.load(sc, '/Users/xiaoru_zhu/PycharmProjects/HousingPriceDA/PricePrediction/model/LR.model')

    # Configure
    train_path = '/Users/xiaoru_zhu/PycharmProjects/HousingPriceDA/Dataset/train.csv'
    # Initialize RDD
    rdd_lines = sc.textFile(train_path)
    head = rdd_lines.first()
    rdd_lines = rdd_lines.filter(lambda ln: ln != head) \
                         .mapPartitions(lambda x: csv.reader(x)) \
                         .persist(StorageLevel(True, True, False, False, 1))  # MEMORY_AND_DISK
    # Prepare for normalization
    sub = []
    minimum = []
    for index in range(5, 8):
        max_ = float(rdd_lines.map(lambda attr: attr[index]).max(key=float))
        min_ = float(rdd_lines.map(lambda attr: attr[index]).min(key=float))
        subtract = max_ - min_
        minimum.append(min_)
        sub.append(subtract)

    # Normalization(gui yi): (val - min)/(max - min), to let number feature values in [0, 1] and narrow down the error
    def normalization(line):
        line[5] = (float(line[5]) - minimum[0]) / sub[0]
        line[6] = (float(line[6]) - minimum[1]) / sub[1]
        line[7] = (float(line[7]) - minimum[2]) / sub[2]
        return line

    values = normalization(values)


    # extract features from every category column and generate dict
    def be_mapped(rdd_arg, column):
        return rdd_arg.map(lambda attr: attr[column]) \
                      .distinct() \
                      .zipWithIndex() \
                      .collectAsMap()  # result : {'BATH BEACH': 0, 'BAY RIDGE': 1, 'BEDFORD STUYVESANT': 2, ...}

    mappings = [be_mapped(rdd_lines, i) for i in [0, 1, 2, 8]]  # collect dicts into a list
    print('category feature mapping dict:', mappings)
    cat_len = sum(map(len, [i for i in mappings]))  # category feature numbers using sum + map function
    num_len = len(rdd_lines.first()[5:8])  # number feature numbers,index = 5,6,7
    total_len = num_len + cat_len  # total feature numbers

    rdd_lines = rdd_lines.map(lambda attr: normalization(attr))


    # extract features from every category column and generate dict
    def be_mapped(rdd_arg, column):
        return rdd_arg.map(lambda attr: attr[column]) \
            .distinct() \
            .zipWithIndex() \
            .collectAsMap()  # result : {'BATH BEACH': 0, 'BAY RIDGE': 1, 'BEDFORD STUYVESANT': 2, ...}

    mappings = [be_mapped(rdd_lines, i) for i in [0, 1, 2, 8]]  # collect dicts into a list
    print('category feature mapping dict:', mappings)
    cat_len = sum(map(len, [i for i in mappings]))  # category feature numbers using sum + map function
    # num_len = len(rdd_lines.first()[5:8])  # number feature numbers,index = 5,6,7

    # Create eigenvectors(feature vectors) for linear regression
    def extract_features(line):
        cat_vec = np.zeros(cat_len)  # new array for category features, init 0 for all elements
        step = 0
        for i, raw_feature in enumerate([line[0], line[1], line[2], line[8]]):  # [(0,line[0]), (1,line[1]), ...) ]
            dict_cate = mappings[i]  # category feature mapping dict {'BATH BEACH': 0, 'BAY RIDGE': 1, 'xxx': 2, ...}
            idx = dict_cate[raw_feature]  # get value from dict
            cat_vec[idx + step] = 1  # set 1 for index in array
            step = step + len(dict_cate)  # jump to the next attribute area
        num_vec = np.array([float(raw_feature) for raw_feature in line[5:8]])
        return np.concatenate((cat_vec, num_vec))  # splice category and number vectors

    values_vec = extract_features(values)

    rst = predict_model.predict(values_vec)

    rst = round(rst, 2)
    r_m_s_l_e = round(1.4002, 2)
    m_a_e = round(2516004.8850, 2)
    rst_lst = [rst, r_m_s_l_e, m_a_e]
    print(rst_lst)
    return rst_lst

Esempio n. 21

0

Mostra file

File: linear_regression.py Progetto: maxenergy/goodsdl2

 def load_model(self, sc, model_file):
     model = LinearRegressionModel.load(sc, model_file)
     return model

Esempio n. 22

0

Mostra file

File: linearSGD.py Progetto: PoojaRavinderDalaya/Big-Data-Project

    return (county, LabeledPoint(values[-1], values[1:-1]))


if __name__ == '__main__':
    sc = SparkContext()

    data = sc.textFile(app.root_path + "/CSVs/test_cancer_final.csv")
    header = data.first()
    data = data.filter(lambda x: x != header)

    parsedData = data.map(parsePoint).map(lambda x: x[1])

    # Build the model
    model = LinearRegressionWithSGD.train(parsedData, iterations=100, step=10)

    # Evaluate the model on training data
    valuesAndPreds = parsedData.map(lambda p:
                                    (p.label, model.predict(p.features)))
    print(valuesAndPreds.collect())
    MSE = valuesAndPreds \
              .map(lambda vp: (vp[0] - vp[1]) ** 2) \
              .reduce(lambda x, y: x + y) / valuesAndPreds.count()
    print("Mean Squared Error = " + str(MSE))

    # Save and load model
    model.save(
        sc,
        app.root_path + "/Models/pythonLinearRegressionWithSGDModel_cancer")
    sameModel = LinearRegressionModel.load(
        sc,
        app.root_path + "/Models/pythonLinearRegressionWithSGDModel_cancer")

Esempio n. 23

0

Mostra file

File: logit_example.py Progetto: vangul01/Predicting-Crime-Using-Taxi-Trips-Data

if __name__ == "__main__":

    sc = SparkContext(appName="PythonLinearRegressionWithSGDExample")

    # $example on$
    # Load and parse the data
    def parsePoint(line):
        values = [float(x) for x in line.replace(',', ' ').split(' ')]
        return LabeledPoint(values[0], values[1:])

    data = sc.textFile("data/mllib/ridge-data/lpsa.data")
    parsedData = data.map(parsePoint)

    # Build the model
    model = LinearRegressionWithSGD.train(parsedData,
                                          iterations=100,
                                          step=0.00000001)

    # Evaluate the model on training data
    valuesAndPreds = parsedData.map(lambda p:
                                    (p.label, model.predict(p.features)))
    MSE = valuesAndPreds \
        .map(lambda vp: (vp[0] - vp[1])**2) \
        .reduce(lambda x, y: x + y) / valuesAndPreds.count()
    print("Mean Squared Error = " + str(MSE))

    # Save and load model
    model.save(sc, "target/tmp/pythonLinearRegressionWithSGDModel")
    sameModel = LinearRegressionModel.load(
        sc, "target/tmp/pythonLinearRegressionWithSGDModel")
    # $example off$

Esempio n. 24

0

Mostra file

File: HousePriceModel.py Progetto: surtha2802/BigMac-Web-Services

import numpy as np
from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD, LinearRegressionModel
sc = SparkContext("local")


# Load and parse the data
def parsePoint(line):
    #values = [float(x) for x in line.replace(',', ' ').split(' ')]
    values = line.split(',')
    return LabeledPoint(values[2:3], values[0:2] + values[3:])


data = sc.textFile("/root/Desktop/dataset/kc_house_data.csv")
parsedData = data.map(parsePoint)

# Build the model
model = LinearRegressionWithSGD.train(parsedData,
                                      iterations=100,
                                      step=0.00000001)

# Evaluate the model on training data
valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
MSE = valuesAndPreds.map(lambda (v, p): (v - p)**2).reduce(
    lambda x, y: x + y) / valuesAndPreds.count()
print("Mean Squared Error = " + str(MSE) +
      "bkbkbbbbbbbbbbbbbbbbbbbbbbbbkkbkbkkkkkkkkkkkkkkkkkkkkkkkkkkkkkbkbkbkb")

# Save and load model
model.save(sc, "myModelPath")
sameModel = LinearRegressionModel.load(sc, "myModelPath")

Esempio n. 25

0

Mostra file

File: tic_tac_linear_reg.py Progetto: CodeChix-OpenSource/CodeChix-Technical-Curriculums

if __name__ == "__main__":

    sc = SparkContext(appName="TicTacLinearRegressionExample")

    # Parse the data and create LabeledPoints
    def parsePoint(line):
        values = [x for x in line.split(' ')]
        # Last row contains the target data and rest of
        # the rows define the attributes for linear regression
        return LabeledPoint(values[9], values[0:8])

    # Load the data
    data = sc.textFile("data/mllib/sample_traindata_tic_tac.txt")
    parsedData = data.map(parsePoint)

    # Build the model using LinearRegression
    model = LinearRegressionWithSGD.train(parsedData, iterations=100, step=0.00000001)

    # Evaluate the model on training data
    predict = parsedData.map(lambda pd: (pd.label, model.predict(pd.features)))
    MSE = predict \
        .map(lambda (v, p): (v - p)**2) \
        .reduce(lambda x, y: x + y) / predict.count()

    # Print Mean Squared Error
    print("Mean Squared Error for Tic Tac Linear Regression = " + str(MSE))

    # Save and load model
    model.save(sc, "target/tmp/pythonTicTacLinearRegression")
    sameModel = LinearRegressionModel.load(sc, "target/tmp/pythonTicTacLinearRegression")

Esempio n. 26

0

Mostra file

File: tic_tac_linear_reg.py Progetto: arangas27/CodeChix-Technical-Curriculums

    sc = SparkContext(appName="TicTacLinearRegressionExample")

    # Parse the data and create LabeledPoints
    def parsePoint(line):
        values = [x for x in line.split(' ')]
        # Last row contains the target data and rest of
        # the rows define the attributes for linear regression
        return LabeledPoint(values[9], values[0:8])

    # Load the data
    data = sc.textFile("data/mllib/sample_traindata_tic_tac.txt")
    parsedData = data.map(parsePoint)

    # Build the model using LinearRegression
    model = LinearRegressionWithSGD.train(parsedData,
                                          iterations=100,
                                          step=0.00000001)

    # Evaluate the model on training data
    predict = parsedData.map(lambda pd: (pd.label, model.predict(pd.features)))
    MSE = predict \
        .map(lambda (v, p): (v - p)**2) \
        .reduce(lambda x, y: x + y) / predict.count()

    # Print Mean Squared Error
    print("Mean Squared Error for Tic Tac Linear Regression = " + str(MSE))

    # Save and load model
    model.save(sc, "target/tmp/pythonTicTacLinearRegression")
    sameModel = LinearRegressionModel.load(
        sc, "target/tmp/pythonTicTacLinearRegression")

Esempio n. 27

0

Mostra file

File: Visualization.py Progetto: x2ever/Python-Spark-Linear-Regresssion

sc = SparkContext()
train_data = sc.textFile("train.csv")
test_data = sc.textFile("test.csv")

parsedTrainData = train_data.map(parsePoint).filter(lambda x: x is not None)
parsedTestData = test_data.map(parsePoint).filter(lambda x: x is not None)

mat = RowMatrix(train_data.map(parseVector).filter(lambda x: x is not None))
pc = mat.computePrincipalComponents(2)
projected = mat.multiply(pc)

x = [vector[0] for vector in projected.rows.collect()]
y = [vector[1] for vector in projected.rows.collect()]

LinearModel = LinearRegressionModel.load(sc, "Linear")
RidgeModel = RidgeRegressionModel.load(sc, "Ridge")
LassoModel = LassoModel.load(sc, "Lasso")

valuesAndPredsLinearTrain = parsedTrainData.map(
    lambda p: (p.label, LinearModel.predict(p.features)))
valuesAndPredsLinearTest = parsedTestData.map(
    lambda p: (p.label, LinearModel.predict(p.features)))

valuesAndPredsRidgeTrain = parsedTrainData.map(
    lambda p: (p.label, RidgeModel.predict(p.features)))
valuesAndPredsRidgeTest = parsedTestData.map(
    lambda p: (p.label, RidgeModel.predict(p.features)))

valuesAndPredsLassoTrain = parsedTrainData.map(
    lambda p: (p.label, LassoModel.predict(p.features)))

Esempio n. 28

0

Mostra file

)

for i in range(500):

    o.write(str(k[i][0]))
    o.write("&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp")
    o.write(str(k[i][1]))
    o.write("<br>")
    o.write("\n")
o.write("</body>")
o.write("</html>")

os.system("rm -rf /root/Desktop/mpv2/mapredtest1/templates/valpred.html")
os.system(
    "cp /root/Desktop/valpred.html /root/Desktop/mpv2/mapredtest1/templates/valpred.html"
)
'''
trainerr=valuesAndPreds.filter(lambda(v,p):abs(v-p)>20000).count()/float(parsedData.count())
print valuesAndPreds.count(),"iuieuwieuiueieuiwuieuriuwieuieuiruwiurieuriue"
#print("Mean Squared Error = " + str(MSE)+"bkbkbbbbbbbbbbbbbbbbbbbbbbbbkkbkbkkkkkkkkkkkkkkkkkkkkkkkkkkkkkbkbkbkb")
print parsedtrainData.take(5),"hjjhjhjhjjjjjjjjjjjjjjjjjj"
print parsedtestData.take(1)[0].features,"lnlnlnnnlnnnnnnnnnnnnnnlnlnanslnlnslnlansdlnads"
print "answer bjbsbdjk=" , model.predict(parsedtestData.take(1)[0].features)
print parsedtestData.take(1)[0].label ,"lkmnhbgvyctexrwzea"
print parsedtrainData.take(5)
print trainerr,"rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr"
'''
# Save and load model
model.save(sc, supermodel)
sameModel = LinearRegressionModel.load(sc, supermodel)

Esempio n. 29

0

Mostra file

File: ch07-listings.py Progetto: AkiraKane/first-edition

#Section 7.5.2
from pyspark.mllib.evaluation import RegressionMetrics
validMetrics = RegressionMetrics(validPredicts)
validMetrics.rootMeanSquaredError
validMetrics.meanSquaredError

#Section 7.5.3
import operator
print(",".join([str(s) for s in sorted(enumerate([abs(x) for x in model.weights.toArray()]), key=operator.itemgetter(0))]))

#Section 7.5.4
model.save(sc, "ch07output/model")

from pyspark.mllib.regression import LinearRegressionModel
model = LinearRegressionModel.load(sc, "ch07output/model")


#Section 7.6.1
def iterateLRwSGD(iterNums, stepSizes, train, valid):
  from pyspark.mllib.regression import LinearRegressionWithSGD
  import math
  for numIter in iterNums:
    for step in stepSizes:
      alg = LinearRegressionWithSGD()
      model = alg.train(train, iterations=numIter, step=step, intercept=True)
      rescaledPredicts = train.map(lambda x: (float(model.predict(x.features)), x.label))
      validPredicts = valid.map(lambda x: (float(model.predict(x.features)), x.label))
      meanSquared = math.sqrt(rescaledPredicts.map(lambda p: pow(p[0]-p[1],2)).mean())
      meanSquaredValid = math.sqrt(validPredicts.map(lambda p: pow(p[0]-p[1],2)).mean())
      print("%d, %5.3f -> %.4f, %.4f" % (numIter, step, meanSquared, meanSquaredValid))

Esempio n. 30

0

Mostra file

File: linear_regression_with_sgd_example.py Progetto: 11wzy001/spark

from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD, LinearRegressionModel
# $example off$

if __name__ == "__main__":

    sc = SparkContext(appName="PythonLinearRegressionWithSGDExample")

    # $example on$
    # Load and parse the data
    def parsePoint(line):
        values = [float(x) for x in line.replace(',', ' ').split(' ')]
        return LabeledPoint(values[0], values[1:])

    data = sc.textFile("data/mllib/ridge-data/lpsa.data")
    parsedData = data.map(parsePoint)

    # Build the model
    model = LinearRegressionWithSGD.train(parsedData, iterations=100, step=0.00000001)

    # Evaluate the model on training data
    valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
    MSE = valuesAndPreds \
        .map(lambda vp: (vp[0] - vp[1])**2) \
        .reduce(lambda x, y: x + y) / valuesAndPreds.count()
    print("Mean Squared Error = " + str(MSE))

    # Save and load model
    model.save(sc, "target/tmp/pythonLinearRegressionWithSGDModel")
    sameModel = LinearRegressionModel.load(sc, "target/tmp/pythonLinearRegressionWithSGDModel")
    # $example off$

Esempio n. 31

0

Mostra file

File: LinearRegression_narrow.py Progetto: bsangee/spark_vs_r

	return LabeledPoint(values[7], values[0:11]) 

#data_file = sc.textFile("/home/faiz89/Desktop/Eastman/2008.csv")
data_file = sc.textFile("../2008_small.csv")
header = data_file.first ()
raw_data = data_file.filter (lambda x:x != header)

#examples = MLUtils.loadLibSVMFile(sc, "2008.csv").collect()
parsedData = raw_data.map(parsePoint)
(trainingData, testData) = parsedData.randomSplit([0.7, 0.3])
startTime = datetime.now()

# Build the model
trainingData.cache ()
model = LinearRegressionWithSGD.train(trainingData, iterations=1)
print ('Training Time consumed = '), (datetime.now() - startTime)
startTestTime = datetime.now()
testData.cache()
# Evaluating the model on training data
valuesAndPreds = testData.map(lambda p: (p.label, model.predict(p.features)))
MSE = valuesAndPreds \
    .map(lambda (v, p): (v - p)**2) \
    .reduce(lambda x, y: x + y) / valuesAndPreds.count()
print ('Testing Time consumed = '), (datetime.now() - startTestTime)
print ('Total Time: '), (datetime.now() - startTime)

print("Mean Squared Error = " + str(MSE))
# Save and load model
model.save(sc, "LinearRegressionNarrow2008_cache_both_train_and_test")
sameModel = LinearRegressionModel.load(sc, "LinearRegressionNarrow2008_cache_both_train_and_test")

Esempio n. 32

0

Mostra file

from pyspark.sql.functions import *
from datetime import datetime,timedelta
from pyspark.mllib.regression import LabeledPoint,LinearRegressionWithSGD,LinearRegressionModel
from pyspark.mllib.feature import HashingTF

inputPath = '/user/ssambasi/SFPD_parquet'
conf = SparkConf().setAppName('Predict Alarming District')
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)

crimeDF =sqlContext.read.parquet(inputPath).cache()

htf = HashingTF(5000)

#Load the model
lrm = LinearRegressionModel.load(sc, '/user/ssambasi/sfo/CrimeCountPredictionModel')

#Load test data for demo
districtRDD = crimeDF.select('PdDistrict').distinct().rdd.filter(lambda r:r[0]!='').map(lambda r:r[0]).cache()
startDate = datetime.now()
dateList = []
for dateIndex in range(0,30):
    dateList.append(startDate + timedelta(days=dateIndex))
dateRDD = sc.parallelize(dateList).cache()
testDataRDD = districtRDD.cartesian(dateRDD).map(lambda (district,date): \
                ((district,date),LabeledPoint(1.0,htf.transform((district,date))))).cache()

#Predict Alarming District using the model

def GetMaXCount((district1,count1),(district2,count2)):
    if(count1>count2):