Python LinearRegressionModel Beispiele, pyspark.mllib.regression.LinearRegressionModel Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: prediction_temperature.py Projekt: DhruvKalaria/NEST-IntelligentThermostat

def prediction(hour,extern_temp):	
	if hour>=0 and hour<6:
		
		#Predict Temperature
		tempModel1 = LinearRegressionModel.load(sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model1")
		x = np.array([extern_temp])
		temp_data = tempModel1.predict(x)
		print temp_data

	elif hour>=6 and hour<12:
		
		#Predict Temperature
		tempModel2 = LinearRegressionModel.load(sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model2")
		x = np.array([extern_temp])
		temp_data = tempModel2.predict(x)
		print temp_data

	elif hour>=12 and hour<18:

		#Predict Temperature
		tempModel3 = LinearRegressionModel.load(sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model3")
		x = np.array([extern_temp])
		temp_data = tempModel3.predict(x)
		print temp_data
		
	elif hour>=18 and hour<24:

		#Predict Temperature
		tempModel4 = LinearRegressionModel.load(sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model4")
		x = np.array([extern_temp])
		temp_data = tempModel4.predict(x)
		print temp_data
	f.write('%.2f' % temp_data)
	f.write('\n')

Beispiel #2

0

Datei anzeigen

Datei: composition_prediction_system.py Projekt: WarnWang/Dissertation

    def load_parameters(self):
        self.amount_prediction_method = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL,
                                                                 file_name='amount_method')
        self.trend_prediction_method = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL,
                                                                file_name='trend_method')
        self.data_features = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL, file_name='features')
        self.stock_symbol = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL, file_name='symbol')
        self.data_parser = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL, file_name='data_parser')
        amount_model_path = os.path.join(os.path.abspath(self.model_path), 'amount_model')
        trend_model_path = os.path.join(os.path.abspath(self.model_path), 'trend_model')

        if self.amount_prediction_method == self.RANDOM_FOREST:
            amount_model = RandomForestModel.load(sc=self.sc, path=amount_model_path)
        elif self.amount_prediction_method == self.LINEAR_REGRESSION:
            amount_model = LinearRegressionModel.load(sc=self.sc, path=amount_model_path)
        else:
            amount_model = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL, file_name='amount_model')

        if self.trend_prediction_method == self.RANDOM_FOREST:
            trend_model = RandomForestModel.load(sc=self.sc, path=trend_model_path)
        elif self.trend_prediction_method == self.LOGISTIC_REGRESSION:
            trend_model = LogisticRegressionModel.load(sc=self.sc, path=trend_model_path)
        elif self.trend_prediction_method == self.NAIVE_BAYES:
            trend_model = NaiveBayesModel.load(sc=self.sc, path=trend_model_path)
        elif self.trend_prediction_method == self.SVM:
            trend_model = SVMModel.load(sc=self.sc, path=trend_model_path)
        else:
            trend_model = self.load_data_from_file(data_type=self.SAVE_TYPE_MODEL, file_name='trend_model')

        return trend_model, amount_model

Beispiel #3

0

Datei anzeigen

Datei: prediction_temperature.py Projekt: sherlock-home/Nest-Thermostat

def prediction(hour, extern_temp):
    if hour >= 0 and hour < 6:

        #Predict Temperature
        tempModel1 = LinearRegressionModel.load(
            sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model1")
        x = np.array([extern_temp])
        temp_data = tempModel1.predict(x)
        print temp_data

    elif hour >= 6 and hour < 12:

        #Predict Temperature
        tempModel2 = LinearRegressionModel.load(
            sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model2")
        x = np.array([extern_temp])
        temp_data = tempModel2.predict(x)
        print temp_data

    elif hour >= 12 and hour < 18:

        #Predict Temperature
        tempModel3 = LinearRegressionModel.load(
            sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model3")
        x = np.array([extern_temp])
        temp_data = tempModel3.predict(x)
        print temp_data

    elif hour >= 18 and hour < 24:

        #Predict Temperature
        tempModel4 = LinearRegressionModel.load(
            sc, "/home/dhruv/Desktop/IoT/Assignment-3/Final/model4")
        x = np.array([extern_temp])
        temp_data = tempModel4.predict(x)
        print temp_data
    f.write('%.2f' % temp_data)
    f.write('\n')

Beispiel #4

0

Datei anzeigen

Datei: PredictingLinearModel.py Projekt: chelo26/CodeExample

def main(sc):
    features_cr = sc.pickleFile('/tmp/features_saved')
    linear_model = LinearRegressionModel.load(sc, "/tmp/linear_model")
    # Getting the features ready for predicting
    numberFeatures = len(features_cr.first()) - 1
    mappings = [get_mapping(features_cr, i) for i in range(0, numberFeatures)]

    # Month Dictionary
    dictio_month = {}
    for i in range(12):
        dictio_month[i + 1] = i
    mappings[1] = dictio_month

    cat_len = sum(map(len, mappings))

    if len(sys.argv) == 2:
        dateZoneGroup = str(sys.argv[1]).split(',')
        zones = get_group_zone(dateZoneGroup[5], 'zone', features_cr)
        groups = get_group_zone(dateZoneGroup[6], 'group', features_cr)
        year_feat = int(dateZoneGroup[0])
        month_feat = int(dateZoneGroup[1])
        day_feat = int(dateZoneGroup[3])
        startDate = date(year_feat, month_feat, day_feat)
        endDate = date(year_feat, month_feat, day_feat)
        print(startDate)
    else:
        startDate = datetime.strptime(str(sys.argv[1]), event_date).date()
        endDate = datetime.strptime(str(sys.argv[2]), event_date).date()
        zones = get_group_zone(str(sys.argv[3]), 'zone', features_cr)
        groups = get_group_zone(str(sys.argv[4]), 'group', features_cr)

    dateRangeWF = list(date_range(startDate, endDate))

    # Making predictions:

    feat_vs_pred = list()
    featPredShow = list()
    for g in groups:
        for z in zones:
            for dR in dateRangeWF:
                featureLine = format_date(dR, z, g, mappings, cat_len)
                #print(featureLine)
                featureShow = format_show(dR, z, g)
                predLine = linear_model.predict(featureLine)
                feat_vs_pred.append(list(featureLine) + [predLine])
                featPredShow.append(list(featureShow) + [predLine])

    scFeaturesPred = sc.parallelize(featPredShow)
    [print(j) for j in scFeaturesPred.collect()]

Beispiel #5

0

Datei anzeigen

Datei: finalcode.py Projekt: sasoltan/DroughtPercipitation

def prediction():
    year=yearprediction
    stations = sc.textFile(output+"/stationtextformat")
    stations = stations.map(getdata).map(lambda x: (x[0], int(year), float(x[1]), float(x[2])))
    lat = stations.map(lambda x: (x[2])).cache()
    min_lat = lat.min()
    max_lat = lat.max()

    longtitude =  stations.map(lambda x: (x[3])).cache()
    min_long = longtitude.min()
    max_long = longtitude.max()

    max_ = [float('2050'), max_lat, max_long]
    min_ = [float('1990'), min_lat, min_long]

    stations = stations.map(lambda x: scalePoint(x, max_, min_)).cache()
    stationsDF = sqlContext.createDataFrame(stations)
    # load the model
    sameModel = LinearRegressionModel.load(sc, output+"/modelpath")
    # run the model
    stationidAndPreds = stations.map(lambda p : (p[0],  float(sameModel.predict(p[1:]))))
    # the result returns a predicted value for each station (stationId) in the given year
    resultRdd = stationidAndPreds.map(rescale)
    rddschema = resultRdd.map(lambda (a,b): Row(station= a, avg_prcp=b)).cache()
    stationidAndPredsDF = sqlContext.createDataFrame(rddschema)
    stationidAndPredsDF.registerTempTable("stationPrediction")
    getCountries()
    countires = sc.textFile(output+"/countries")
    countriesRdd = countires.map(getdata)
    countries = countriesRdd.map(lambda (a,b): Row(station= a, country=b)).cache()
    countriesDF = sqlContext.createDataFrame(countries)
    countriesDF.registerTempTable("StationTable")
    countriesDF.cache()
    shortenstations = sqlContext.sql("SELECT SUBSTR(station, 1, 2) As station,avg_prcp FROM stationPrediction")
    shortenstations.show()
    joinedresult = countriesDF.join(shortenstations).where(countriesDF.station == shortenstations.station).select(shortenstations.avg_prcp, countriesDF.country)
    joinedresult.registerTempTable("joinedresult")
    results = sqlContext.sql("SELECT country, Avg(avg_prcp) as avg_prcp FROM joinedresult GROUP BY country")
    results.registerTempTable("results")
    outrdd=results.repartition(40).rdd.map(lambda l: str(l.country)+","+str(l.avg_prcp)).coalesce(1)
    path = yearprediction
    outrdd.saveAsTextFile(output+'/prediction/'+path)

Beispiel #6

0

Datei anzeigen

Datei: prediction.py Projekt: ryanstrat/stock-predictions

def CustomPredict(date_start, date_end, company):

# create spark context
    sc = SparkContext(appName="Model")
# create an api object
    api = NewsAPI.NewsAPI(date_start.month,date_start.day,date_start.year,date_end.month,date_end.day,date_end.year, company,'56283d7d6075b9d30773e1ceb440e1b2d029f438')
# load the prediction model for the company
    model = LinearRegressionModel.load(sc, company)
# getting data for the duration of time specified
    api.startGetData()
# get the sentiment average of the days
    l = api.getSentimentScore()
    mean_sent = np.mean(l)
    print ("\n\n\n\n\n" + str(mean_sent) + "\n\n\n\n\n")
# make the prediction using the loaded model
    pred = model.predict([mean_sent])
    print ("\n\n\n\n\n" + str(pred) + "\n\n\n\n\n")
# close the spark context
    sc.stop()
# return
    return pred

Beispiel #7

0

Datei anzeigen

Datei: sparkstreaming.py Projekt: sindhu819/InsightDE

def main():


    spark = SparkSession.builder.appName("TRAFFIC").config("spark.executor.cores", "4").config("spark.executor.memory", "4g").getOrCreate()
    sc = spark.sparkContext
    mapping = sc.textFile("s3a://insighttraffic/ML_model/mappings").collect()[0]
    mapping = ast.literal_eval(str(mapping))

    models=[]
    for hour in range(0, 24):
        model = LinearRegressionModel.load(sc, "s3a://insighttraffic/ML_model/linear_model_log_"+str(hour))
        models.append(model)

    category_len = 154

    sqlContext = sql.SQLContext(sc)


    hadoop_conf=sc._jsc.hadoopConfiguration()
    hadoop_conf.set("fs.s3n.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    hadoop_conf.set("fs.s3n.awsAccessKeyId", 'awsAccessKeyId')
    hadoop_conf.set("fs.s3n.awsSecretAccessKey", 'awsSecretAccessKey')

    # set microbatch interval as 10 seconds, this can be customized according to the project
    ssc = StreamingContext(sc,10)
    # directly receive the data under a certain topic
    kafkaStream = KafkaUtils.createDirectStream(ssc, ['data'], {"metadata.broker.list": 'Kafka-DNS:9092'})


    connection = psycopg2.connect(host = 'postgres-ip-address', database = 'postgres', user = '******', password = '******')
    cursor = connection.cursor()
    cursor.execute('CREATE TABLE IF NOT EXISTS realtimetraffic (sid text, location text, latitude double precision, longitude double precision,\
        direction text, lanes integer, roadtype text, highway text, current integer, historical double precision, level text, PRIMARY KEY (id));')
    cursor.execute('SELECT AddGeometryColumn (%s,%s,%s,4326,%s,2);', (public,realtimetraffic,geom,POINT,))


    #The inbound stream is a DStream
    dstream = kafkaStream.map(lambda (key, value): json.loads(value))
    dstream.foreachRDD(lambda rdd: update(rdd, models, mapping))

Beispiel #8

0

Datei anzeigen

Datei: modelrun.py Projekt: surtha2802/BigMac-Web-Services

#!usr/local/spark/python
from pyspark import SparkContext

from pyspark.mllib.regression import LabeledPoint
import numpy as np
from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD, LinearRegressionModel
from pyspark.mllib.classification import SVMWithSGD, SVMModel
sc = SparkContext("local")
from pyspark.mllib.classification import LogisticRegressionWithLBFGS, LogisticRegressionModel
import os
features = open("/root/Desktop/features.LOL", "r")
feature = features.read().strip().split(' ')
i = map(lambda x: float(x), feature)

param = open("/root/Desktop/parameters.LOL", "r")
temp = param.readlines()
print "LOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOLOL"
supermodel = temp[2].strip()
Modelname = LinearRegressionModel.load(sc, supermodel)

print Modelname.predict(i), "ijfksfksnfknfn jncjnnfknsklfnsk"
r = str(Modelname.predict(i))
ss = "echo %s >/root/Desktop/predicted.txt" % r
os.system(ss)

Beispiel #9

0

Datei anzeigen

from pyspark.mllib.regression import LinearRegressionWithSGD, LinearRegressionModel, LabeledPoint
from pyspark import SparkContext

def parsePoint(line):

	values=[float(x) for x in line.split(',')]
	
	return LabeledPoint(values[2],[values[0], values[1]])


sc=SparkContext()

model = LinearRegressionModel.load(sc, "/home/khaled/project/tmp/lin_reg_model")

data_test=sc.textFile("/home/khaled/project/data_gen/test.csv")
data_test_parsed=data_test.map(parsePoint)
data_test2=sc.textFile("/home/khaled/project/data_gen/test2.csv")
data_test_parsed2=data_test2.map(lambda x: x.split(','))
predics=data_test_parsed.map(lambda x :model.predict(x.features))
predics2=data_test_parsed2.map(lambda x :model.predict(x))
data_itr=predics.collect()
data_itr2=predics2.collect()
f=open("predictions.txt","w+")
f.write("sbah el khir \n")
for i in data_itr:
	f.write("the ouput consumption for is:" +str(i) + "\n")
for i in data_itr2:
	f.write("the ouput consumption for is:" +str(i) + "\n")
f.close()

Beispiel #10

0

Datei anzeigen

)

for i in range(500):

    o.write(str(k[i][0]))
    o.write("&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp")
    o.write(str(k[i][1]))
    o.write("<br>")
    o.write("\n")
o.write("</body>")
o.write("</html>")

os.system("rm -rf /root/Desktop/mpv2/mapredtest1/templates/valpred.html")
os.system(
    "cp /root/Desktop/valpred.html /root/Desktop/mpv2/mapredtest1/templates/valpred.html"
)
'''
trainerr=valuesAndPreds.filter(lambda(v,p):abs(v-p)>20000).count()/float(parsedData.count())
print valuesAndPreds.count(),"iuieuwieuiueieuiwuieuriuwieuieuiruwiurieuriue"
#print("Mean Squared Error = " + str(MSE)+"bkbkbbbbbbbbbbbbbbbbbbbbbbbbkkbkbkkkkkkkkkkkkkkkkkkkkkkkkkkkkkbkbkbkb")
print parsedtrainData.take(5),"hjjhjhjhjjjjjjjjjjjjjjjjjj"
print parsedtestData.take(1)[0].features,"lnlnlnnnlnnnnnnnnnnnnnnlnlnanslnlnslnlansdlnads"
print "answer bjbsbdjk=" , model.predict(parsedtestData.take(1)[0].features)
print parsedtestData.take(1)[0].label ,"lkmnhbgvyctexrwzea"
print parsedtrainData.take(5)
print trainerr,"rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr"
'''
# Save and load model
model.save(sc, supermodel)
sameModel = LinearRegressionModel.load(sc, supermodel)

Beispiel #11

0

Datei anzeigen

Datei: linear.py Projekt: psh5487/BigData_Regression

def parsePoint(line):
    values = [np.float(x) for x in line.replace(',', ' ').split(' ')]
    return LabeledPoint(values[6], values[0:6])


data = sc.textFile("/user/cloudera/hw1/train_nohead.csv")
wholedata = sc.textFile("/user/cloudera/hw1/wholedata.csv")

parsedData = data.map(parsePoint)
parsedWholeData = wholedata.map(parsePoint)

#Build the model
model = LinearRegressionWithSGD.train(parsedData, iterations=100, step=0.1)

#Evaluate the model
valuesAndPreds = parsedWholeData.map(lambda p:
                                     (p.label, model.predict(p.features)))

RMSE = np.sqrt(
    valuesAndPreds \
 .map(lambda (v, p): (v - p)**2) \
 .reduce(lambda x, y: x + y) / valuesAndPreds.count()
)
print("linear regression output : \n")
print("RMSE = {0}\n".format(RMSE))

#save and load model
model.save(sc, "/user/cloudera/hw1/results/2015310884_linear")
sameModel = LinearRegressionModel.load(
    sc, "/user/cloudera/hw1/results/2015310884_linear")

Beispiel #12

0

Datei anzeigen

    LabeledPoint(2.0, [1.0, 1.4]),
    LabeledPoint(4.0, [2.0, 1.9]),
    LabeledPoint(6.0, [3.0, 4.0])
]  # 训练集
lrm = LinearRegressionWithSGD.train(sc.parallelize(data),
                                    iterations=100,
                                    initialWeights=np.array([1.0, 1.0]))
print(lrm.predict(np.array([2.0, 1.0])))  # 利用训练出的回归模型进行预测

import os, tempfile
from pyspark.mllib.regression import LinearRegressionModel
from pyspark.mllib.linalg import SparseVector

path = tempfile.mkdtemp()
lrm.save(sc, path)  # 将模型保存至外存
sameModel = LinearRegressionModel.load(sc, path)  # 读取模型
print(sameModel.predict(SparseVector(2, {
    0: 100.0,
    1: 150
})))  # 利用稀疏向量作为数据结构,返回单个预测值
test_set = []
for i in range(100):
    for j in range(100):
        test_set.append(SparseVector(2, {0: i, 1: j}))
print(sameModel.predict(sc.parallelize(test_set)).collect())  # 预测多值，返回一个RDD数据集
print(sameModel.weights)  # 返回参数

# -----------------岭回归------------------

from pyspark.mllib.regression import RidgeRegressionWithSGD

Beispiel #13

0

Datei anzeigen

Datei: linearSGD.py Projekt: PoojaRavinderDalaya/Big-Data-Project

    return (county, LabeledPoint(values[-1], values[1:-1]))


if __name__ == '__main__':
    sc = SparkContext()

    data = sc.textFile(app.root_path + "/CSVs/test_cancer_final.csv")
    header = data.first()
    data = data.filter(lambda x: x != header)

    parsedData = data.map(parsePoint).map(lambda x: x[1])

    # Build the model
    model = LinearRegressionWithSGD.train(parsedData, iterations=100, step=10)

    # Evaluate the model on training data
    valuesAndPreds = parsedData.map(lambda p:
                                    (p.label, model.predict(p.features)))
    print(valuesAndPreds.collect())
    MSE = valuesAndPreds \
              .map(lambda vp: (vp[0] - vp[1]) ** 2) \
              .reduce(lambda x, y: x + y) / valuesAndPreds.count()
    print("Mean Squared Error = " + str(MSE))

    # Save and load model
    model.save(
        sc,
        app.root_path + "/Models/pythonLinearRegressionWithSGDModel_cancer")
    sameModel = LinearRegressionModel.load(
        sc,
        app.root_path + "/Models/pythonLinearRegressionWithSGDModel_cancer")

Beispiel #14

0

Datei anzeigen

Datei: linearSGD.py Projekt: mayank134819/BigMac-Web-Services

for i in range(500):
	
	o.write(str(k[i][0]))
	o.write("&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp")
	o.write(str(k[i][1]))
	o.write("<br>")
	o.write("\n")
o.write("</body>")
o.write("</html>")

os.system("rm -rf /root/Desktop/mpv2/mapredtest1/templates/valpred.html")
os.system("cp /root/Desktop/valpred.html /root/Desktop/mpv2/mapredtest1/templates/valpred.html")

'''
trainerr=valuesAndPreds.filter(lambda(v,p):abs(v-p)>20000).count()/float(parsedData.count())
print valuesAndPreds.count(),"iuieuwieuiueieuiwuieuriuwieuieuiruwiurieuriue"
#print("Mean Squared Error = " + str(MSE)+"bkbkbbbbbbbbbbbbbbbbbbbbbbbbkkbkbkkkkkkkkkkkkkkkkkkkkkkkkkkkkkbkbkbkb")
print parsedtrainData.take(5),"hjjhjhjhjjjjjjjjjjjjjjjjjj"
print parsedtestData.take(1)[0].features,"lnlnlnnnlnnnnnnnnnnnnnnlnlnanslnlnslnlansdlnads"
print "answer bjbsbdjk=" , model.predict(parsedtestData.take(1)[0].features)
print parsedtestData.take(1)[0].label ,"lkmnhbgvyctexrwzea"
print parsedtrainData.take(5)
print trainerr,"rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr"
'''
# Save and load model
model.save(sc, supermodel)
sameModel = LinearRegressionModel.load(sc, supermodel)

Beispiel #15

0

Datei anzeigen

Datei: recipe9-6.py Projekt: ziwangdeng/pyspark-recipes

from pyspark.mllib.regression import LinearRegressionWithSGD as lrSGD
ourModelWithLinearRegression  = lrSGD.train(
                                                data = regressionLabelPointTrainData,
                                             iterations = 200,
                                             step = 0.02, 
                                             intercept = True)

ourModelWithLinearRegression.intercept
ourModelWithLinearRegression.weights

#Step 9-6-5. Saving the created model.

ourModelWithLinearRegression.save(sc, '/home/pysparkbook/ourModelWithLinearRegression')
from pyspark.mllib.regression import LinearRegressionModel as linearRegressModel

ourModelWithLinearRegressionReloaded = linearRegressModel.load(sc, '/home/pysparkbook/ourModelWithLinearRegression')
ourModelWithLinearRegressionReloaded.intercept
ourModelWithLinearRegressionReloaded.weights

#Step 9-6-6. Predicting the data using model.

actualDataandLinearRegressionPredictedData = regressionLabelPointTestData.map(lambda data : (float(data.label) , float(ourModelWithLinearRegression.predict(data.features))))
actualDataandLinearRegressionPredictedData.take(5)

#Step 9-6-7. Evaluating the model we have created.

from pyspark.mllib.evaluation import RegressionMetrics as rmtrcs
ourLinearRegressionModelMetrics = rmtrcs(actualDataandLinearRegressionPredictedData)
ourLinearRegressionModelMetrics.rootMeanSquaredError
ourLinearRegressionModelMetrics.r2

Beispiel #16

0

Datei anzeigen

Datei: regression.py Projekt: sasoltan/DroughtPercipitation

MSE = valuesAndPreds.map(lambda (v, p): (v - p) ** 2).reduce(lambda x, y: x + y) / valuesAndPreds.count()
print("Mean Squared Error = " + str(MSE))

# Save and load model
def save(self, sc, path):
    java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel(
        _py2java(sc, self._coeff), self.intercept
    )
    java_model.save(sc._jsc.sc(), path)


@classmethod
def load(cls, sc, path):
    java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel.load(sc._jsc.sc(), path)
    weights = _java2py(sc, java_model.weights())
    intercept = java_model.intercept()
    model = LinearRegressionModel(weights, intercept)
    return model


# Save parameters i.e. min_ and max_ on disk
f = open(params, "w")
f.write(str(str(min_) + "," + str(max_)))
f.close()


model.save(sc, myModelPath)
sameModel = LinearRegressionModel.load(sc, myModelPath)
sample = testData.map(lambda p: p.features)
predictValues = sameModel.predict(sample)

Beispiel #17

0

Datei anzeigen

Datei: predict_module.py Projekt: KevinXiaoruZhu/Data-Visualization

def run_saved_model(pr_values, sc):
    values = list(pr_values)
    predict_model = LinearRegressionModel.load(sc, '/Users/xiaoru_zhu/PycharmProjects/HousingPriceDA/PricePrediction/model/LR.model')

    # Configure
    train_path = '/Users/xiaoru_zhu/PycharmProjects/HousingPriceDA/Dataset/train.csv'
    # Initialize RDD
    rdd_lines = sc.textFile(train_path)
    head = rdd_lines.first()
    rdd_lines = rdd_lines.filter(lambda ln: ln != head) \
                         .mapPartitions(lambda x: csv.reader(x)) \
                         .persist(StorageLevel(True, True, False, False, 1))  # MEMORY_AND_DISK
    # Prepare for normalization
    sub = []
    minimum = []
    for index in range(5, 8):
        max_ = float(rdd_lines.map(lambda attr: attr[index]).max(key=float))
        min_ = float(rdd_lines.map(lambda attr: attr[index]).min(key=float))
        subtract = max_ - min_
        minimum.append(min_)
        sub.append(subtract)

    # Normalization(gui yi): (val - min)/(max - min), to let number feature values in [0, 1] and narrow down the error
    def normalization(line):
        line[5] = (float(line[5]) - minimum[0]) / sub[0]
        line[6] = (float(line[6]) - minimum[1]) / sub[1]
        line[7] = (float(line[7]) - minimum[2]) / sub[2]
        return line

    values = normalization(values)


    # extract features from every category column and generate dict
    def be_mapped(rdd_arg, column):
        return rdd_arg.map(lambda attr: attr[column]) \
                      .distinct() \
                      .zipWithIndex() \
                      .collectAsMap()  # result : {'BATH BEACH': 0, 'BAY RIDGE': 1, 'BEDFORD STUYVESANT': 2, ...}

    mappings = [be_mapped(rdd_lines, i) for i in [0, 1, 2, 8]]  # collect dicts into a list
    print('category feature mapping dict:', mappings)
    cat_len = sum(map(len, [i for i in mappings]))  # category feature numbers using sum + map function
    num_len = len(rdd_lines.first()[5:8])  # number feature numbers,index = 5,6,7
    total_len = num_len + cat_len  # total feature numbers

    rdd_lines = rdd_lines.map(lambda attr: normalization(attr))


    # extract features from every category column and generate dict
    def be_mapped(rdd_arg, column):
        return rdd_arg.map(lambda attr: attr[column]) \
            .distinct() \
            .zipWithIndex() \
            .collectAsMap()  # result : {'BATH BEACH': 0, 'BAY RIDGE': 1, 'BEDFORD STUYVESANT': 2, ...}

    mappings = [be_mapped(rdd_lines, i) for i in [0, 1, 2, 8]]  # collect dicts into a list
    print('category feature mapping dict:', mappings)
    cat_len = sum(map(len, [i for i in mappings]))  # category feature numbers using sum + map function
    # num_len = len(rdd_lines.first()[5:8])  # number feature numbers,index = 5,6,7

    # Create eigenvectors(feature vectors) for linear regression
    def extract_features(line):
        cat_vec = np.zeros(cat_len)  # new array for category features, init 0 for all elements
        step = 0
        for i, raw_feature in enumerate([line[0], line[1], line[2], line[8]]):  # [(0,line[0]), (1,line[1]), ...) ]
            dict_cate = mappings[i]  # category feature mapping dict {'BATH BEACH': 0, 'BAY RIDGE': 1, 'xxx': 2, ...}
            idx = dict_cate[raw_feature]  # get value from dict
            cat_vec[idx + step] = 1  # set 1 for index in array
            step = step + len(dict_cate)  # jump to the next attribute area
        num_vec = np.array([float(raw_feature) for raw_feature in line[5:8]])
        return np.concatenate((cat_vec, num_vec))  # splice category and number vectors

    values_vec = extract_features(values)

    rst = predict_model.predict(values_vec)

    rst = round(rst, 2)
    r_m_s_l_e = round(1.4002, 2)
    m_a_e = round(2516004.8850, 2)
    rst_lst = [rst, r_m_s_l_e, m_a_e]
    print(rst_lst)
    return rst_lst

Beispiel #18

0

Datei anzeigen

Datei: linear_regression.py Projekt: maxenergy/goodsdl2

 def load_model(self, sc, model_file):
     model = LinearRegressionModel.load(sc, model_file)
     return model

Beispiel #19

0

Datei anzeigen

Datei: scikitLearn.py Projekt: Inscrutive/spark

# MAGIC %md The tuned model does better! (Note: Performance can vary because of randomness, but it should be better.)

# COMMAND ----------

print 'Tuned model with best alpha = %g' % bestAlpha
print '  Model intercept: %g' % tunedClf.intercept_
print '  Model coefficients:'
for i in range(len(featureNames)):
  print '    %g\t%s' % (tunedClf.coef_[i], featureNames[i])

# COMMAND ----------

# MAGIC %md ## 3. Converting between scikit-learn and MLlib models
# MAGIC 
# MAGIC It is often possible to convert between scikit-learn and MLlib models.  There is not built-in functionality yet, but we show how to do the conversion for linear models.  This can be useful to take advantage of each library's different sets of functionality.

# COMMAND ----------

# Convert the scikit-learn model into an equivalent MLlib model
from pyspark.mllib.regression import LinearRegressionModel
mllibModel = LinearRegressionModel(tunedClf.coef_, tunedClf.intercept_)
mllibModel

# COMMAND ----------

# Demonstrate that the models compute the same predictions
sklearnPredictions = tunedClf.predict(testFeatures)
mllibPredictions = numpy.array(map(lambda x: mllibModel.predict(x), testFeatures))
differences = sklearnPredictions - mllibPredictions
sumSquaredDifferences = sum(differences * differences)
print 'Total difference between scikit-learn and MLlib model predictions: %g' % sumSquaredDifferences

Beispiel #20

0

Datei anzeigen

Datei: CloudRepairDEV.py Projekt: scmphdpe/CloudRepair

# compute mean ndvi for comparison
meanNDVI = opsRDD.map(lambda x: x.label).mean()

##### use the MEAN NDVI as the prediction (for comparison)
meanRES = test.map(lambda x: (x.label,meanNDVI))
rmseVsMean = rmse(meanRES,numTest)
outString = "Simple Mean NDVI RMSE = " + str(rmseVsMean) + "\n\n"
fOut.write(outString)
######### MEAN NDVI

##### LINEAR REGRESSION WITH STOCHASTIC GRADIENT DESCENT
# if a model has already been trained, use it
# otherwise train a new one and save it
if os.path.exists('~/CloudRepair/MODELS/lrmCR'):
    lrm = LinearRegressionModel.load(sc,'~/CloudRepair/MODELS/lrmCR')
else:
    lrm = LinearRegressionWithSGD.train(training,
                                        iterations=10000,
                                        step=0.0000001,
                                        miniBatchFraction=0.10)
    lrm.save(sc, '~/CloudRepair/MODELS/lrmCR')

lrmPred = lrm.predict(test.map(lambda x: x.features))
lrmRES = test.map(lambda x: x.label).zip(lrmPred)
rmseLRM = rmse(lrmRES,numTest)
outString = "Linear Regression NDVI RMSE = " + str(rmseLRM) + "\n\n"
fOut.write(outString)
######### LINEAR REGRESSION WITH STOCHASTIC GRADIENT DESCENT

##### RANDOM FOREST optimization

Beispiel #21

0

Datei anzeigen

conf = SparkConf().setAppName(
    'Linear least squares, Lasso, and ridge regression').setMaster('local[2]')
sc = SparkContext(conf=conf)


# load and parse the data
def parsePoint(line):
    values = [float(x) for x in line.replace(',', ' ').split(' ')]
    return LabeledPoint(values[0], values[1:])


data = sc.textFile('../data/lpsa.data')
parseData = data.map(parsePoint)

# build the model
model = LinearRegressionWithSGD.train(parseData,
                                      iterations=100,
                                      step=0.0000001)

# evaluate the model on training data
valuesAndPreds = parseData.map(lambda p: (p.label, model.predict(p.features)))
MSE = valuesAndPreds.map(lambda (v, p): (v - p)**2).reduce(
    lambda a, b: a + b) / valuesAndPreds.count()
print('mean squared error :' + str(MSE))

# save and load model
model.save(sc, '../model/pythonLinearRegressionWithSGDModel')
sameModel = LinearRegressionModel.load(
    sc, '../model/pythonLinearRegressionWithSGDModel')
sc.stop()

Beispiel #22

0

Datei anzeigen

        df = sparkSession.createDataFrame([(time.strftime("%Y-%m-%d %H:%M:%S"), store_id, result)], ["timePredicted", "store_id", "value"])
        df.write.format("com.mongodb.spark.sql.DefaultSource").mode("append").save()

        return predicted
    else:
	print("No data received")

if __name__ == "__main__":
    sc = SparkContext(appName="NinoxStreaming")

    my_spark = SparkSession \
        .builder \
        .appName("Ninox") \
        .config("spark.mongodb.input.uri", "mongodb://172.254.0.4:27017/predictions.data") \
        .config("spark.mongodb.output.uri", "mongodb://172.254.0.4:27017/predictions.data") \
        .getOrCreate()

    ssc = StreamingContext(sc, 10)

    # Load model from HDFS
    model = LinearRegressionModel.load(sc, "hdfs://172.254.0.2:9000/user/root/models/first.model")

    # Create stream to get kafka messages
    directKafkaStream = KafkaUtils.createDirectStream(ssc, ["incomingData"], {"metadata.broker.list": "172.254.0.7:9092"})
    
    # Predict and save to mongo
    directKafkaStream.foreachRDD(lambda time, rdd: predict(rdd, model, my_spark, time))

    ssc.start()
    ssc.awaitTermination()
    sc.stop()

Beispiel #23

0

Datei anzeigen

Datei: Stream.py Projekt: akashdaswani/masterbigdata

                type=p[7],
                velocity=int(p[8]),
                error=p[9],
                integration=int(p[10]),
                station=p[11]
            ))

        trafficDF = sqlContext.createDataFrame(trafficData)
        trafficDF.registerTempTable("traffic")

        query = sqlContext.sql("SELECT year, month, day, station, SUM(intensity) intensity  "
                                "FROM traffic "
                                "WHERE error='N' AND station = '28079004' "
                                "GROUP BY year, month, day, station "
                                "LIMIT 1")

        labelPoints = query.map(lambda line:[CommonFunctions.toWeekday(2000 + line[0], line[1], line[2]), CommonFunctions.clasification_intensity(line[4])])
        model = LinearRegressionModel.load(sc, dirTrainingModel)
        valueAir = model.predict(labelPoints.first())

        data = query.map(lambda p: Row(
            valueAir = valueAir,
            year=int(p[0]),
            month=int(p[1]),
            day=int(p[2]),
            station=p[3],
            intensity=p[4]
        ))

        print data.collect()

Beispiel #24

0

Datei anzeigen

Datei: logit_example.py Projekt: vangul01/Predicting-Crime-Using-Taxi-Trips-Data

if __name__ == "__main__":

    sc = SparkContext(appName="PythonLinearRegressionWithSGDExample")

    # $example on$
    # Load and parse the data
    def parsePoint(line):
        values = [float(x) for x in line.replace(',', ' ').split(' ')]
        return LabeledPoint(values[0], values[1:])

    data = sc.textFile("data/mllib/ridge-data/lpsa.data")
    parsedData = data.map(parsePoint)

    # Build the model
    model = LinearRegressionWithSGD.train(parsedData,
                                          iterations=100,
                                          step=0.00000001)

    # Evaluate the model on training data
    valuesAndPreds = parsedData.map(lambda p:
                                    (p.label, model.predict(p.features)))
    MSE = valuesAndPreds \
        .map(lambda vp: (vp[0] - vp[1])**2) \
        .reduce(lambda x, y: x + y) / valuesAndPreds.count()
    print("Mean Squared Error = " + str(MSE))

    # Save and load model
    model.save(sc, "target/tmp/pythonLinearRegressionWithSGDModel")
    sameModel = LinearRegressionModel.load(
        sc, "target/tmp/pythonLinearRegressionWithSGDModel")
    # $example off$

Beispiel #25

0

Datei anzeigen

Datei: HousePriceModel.py Projekt: surtha2802/BigMac-Web-Services

import numpy as np
from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD, LinearRegressionModel
sc = SparkContext("local")


# Load and parse the data
def parsePoint(line):
    #values = [float(x) for x in line.replace(',', ' ').split(' ')]
    values = line.split(',')
    return LabeledPoint(values[2:3], values[0:2] + values[3:])


data = sc.textFile("/root/Desktop/dataset/kc_house_data.csv")
parsedData = data.map(parsePoint)

# Build the model
model = LinearRegressionWithSGD.train(parsedData,
                                      iterations=100,
                                      step=0.00000001)

# Evaluate the model on training data
valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
MSE = valuesAndPreds.map(lambda (v, p): (v - p)**2).reduce(
    lambda x, y: x + y) / valuesAndPreds.count()
print("Mean Squared Error = " + str(MSE) +
      "bkbkbbbbbbbbbbbbbbbbbbbbbbbbkkbkbkkkkkkkkkkkkkkkkkkkkkkkkkkkkkbkbkbkb")

# Save and load model
model.save(sc, "myModelPath")
sameModel = LinearRegressionModel.load(sc, "myModelPath")

Beispiel #26

0

Datei anzeigen

Datei: tic_tac_linear_reg.py Projekt: CodeChix-OpenSource/CodeChix-Technical-Curriculums

if __name__ == "__main__":

    sc = SparkContext(appName="TicTacLinearRegressionExample")

    # Parse the data and create LabeledPoints
    def parsePoint(line):
        values = [x for x in line.split(' ')]
        # Last row contains the target data and rest of
        # the rows define the attributes for linear regression
        return LabeledPoint(values[9], values[0:8])

    # Load the data
    data = sc.textFile("data/mllib/sample_traindata_tic_tac.txt")
    parsedData = data.map(parsePoint)

    # Build the model using LinearRegression
    model = LinearRegressionWithSGD.train(parsedData, iterations=100, step=0.00000001)

    # Evaluate the model on training data
    predict = parsedData.map(lambda pd: (pd.label, model.predict(pd.features)))
    MSE = predict \
        .map(lambda (v, p): (v - p)**2) \
        .reduce(lambda x, y: x + y) / predict.count()

    # Print Mean Squared Error
    print("Mean Squared Error for Tic Tac Linear Regression = " + str(MSE))

    # Save and load model
    model.save(sc, "target/tmp/pythonTicTacLinearRegression")
    sameModel = LinearRegressionModel.load(sc, "target/tmp/pythonTicTacLinearRegression")

Beispiel #27

0

Datei anzeigen

Datei: tic_tac_linear_reg.py Projekt: arangas27/CodeChix-Technical-Curriculums

    sc = SparkContext(appName="TicTacLinearRegressionExample")

    # Parse the data and create LabeledPoints
    def parsePoint(line):
        values = [x for x in line.split(' ')]
        # Last row contains the target data and rest of
        # the rows define the attributes for linear regression
        return LabeledPoint(values[9], values[0:8])

    # Load the data
    data = sc.textFile("data/mllib/sample_traindata_tic_tac.txt")
    parsedData = data.map(parsePoint)

    # Build the model using LinearRegression
    model = LinearRegressionWithSGD.train(parsedData,
                                          iterations=100,
                                          step=0.00000001)

    # Evaluate the model on training data
    predict = parsedData.map(lambda pd: (pd.label, model.predict(pd.features)))
    MSE = predict \
        .map(lambda (v, p): (v - p)**2) \
        .reduce(lambda x, y: x + y) / predict.count()

    # Print Mean Squared Error
    print("Mean Squared Error for Tic Tac Linear Regression = " + str(MSE))

    # Save and load model
    model.save(sc, "target/tmp/pythonTicTacLinearRegression")
    sameModel = LinearRegressionModel.load(
        sc, "target/tmp/pythonTicTacLinearRegression")

Beispiel #28

0

Datei anzeigen

Datei: linear_regression_with_sgd_example.py Projekt: 11wzy001/spark

from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD, LinearRegressionModel
# $example off$

if __name__ == "__main__":

    sc = SparkContext(appName="PythonLinearRegressionWithSGDExample")

    # $example on$
    # Load and parse the data
    def parsePoint(line):
        values = [float(x) for x in line.replace(',', ' ').split(' ')]
        return LabeledPoint(values[0], values[1:])

    data = sc.textFile("data/mllib/ridge-data/lpsa.data")
    parsedData = data.map(parsePoint)

    # Build the model
    model = LinearRegressionWithSGD.train(parsedData, iterations=100, step=0.00000001)

    # Evaluate the model on training data
    valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
    MSE = valuesAndPreds \
        .map(lambda vp: (vp[0] - vp[1])**2) \
        .reduce(lambda x, y: x + y) / valuesAndPreds.count()
    print("Mean Squared Error = " + str(MSE))

    # Save and load model
    model.save(sc, "target/tmp/pythonLinearRegressionWithSGDModel")
    sameModel = LinearRegressionModel.load(sc, "target/tmp/pythonLinearRegressionWithSGDModel")
    # $example off$

Beispiel #29

0

Datei anzeigen

Datei: ch07-listings.py Projekt: datumsays/Spark_In_Action_Book

validMetrics.rootMeanSquaredError
validMetrics.meanSquaredError

#Section 7.5.3
import operator
print(",".join([
    str(s)
    for s in sorted(enumerate([abs(x) for x in model.weights.toArray()]),
                    key=operator.itemgetter(0))
]))

#Section 7.5.4
model.save(sc, "ch07output/model")

from pyspark.mllib.regression import LinearRegressionModel
model = LinearRegressionModel.load(sc, "ch07output/model")


#Section 7.6.1
def iterateLRwSGD(iterNums, stepSizes, train, valid):
    from pyspark.mllib.regression import LinearRegressionWithSGD
    import math
    for numIter in iterNums:
        for step in stepSizes:
            alg = LinearRegressionWithSGD()
            model = alg.train(train,
                              iterations=numIter,
                              step=step,
                              intercept=True)
            rescaledPredicts = train.map(
                lambda x: (float(model.predict(x.features)), x.label))

Beispiel #30

0

Datei anzeigen

Datei: ch07-listings.py Projekt: AkiraKane/first-edition

#Section 7.5.2
from pyspark.mllib.evaluation import RegressionMetrics
validMetrics = RegressionMetrics(validPredicts)
validMetrics.rootMeanSquaredError
validMetrics.meanSquaredError

#Section 7.5.3
import operator
print(",".join([str(s) for s in sorted(enumerate([abs(x) for x in model.weights.toArray()]), key=operator.itemgetter(0))]))

#Section 7.5.4
model.save(sc, "ch07output/model")

from pyspark.mllib.regression import LinearRegressionModel
model = LinearRegressionModel.load(sc, "ch07output/model")


#Section 7.6.1
def iterateLRwSGD(iterNums, stepSizes, train, valid):
  from pyspark.mllib.regression import LinearRegressionWithSGD
  import math
  for numIter in iterNums:
    for step in stepSizes:
      alg = LinearRegressionWithSGD()
      model = alg.train(train, iterations=numIter, step=step, intercept=True)
      rescaledPredicts = train.map(lambda x: (float(model.predict(x.features)), x.label))
      validPredicts = valid.map(lambda x: (float(model.predict(x.features)), x.label))
      meanSquared = math.sqrt(rescaledPredicts.map(lambda p: pow(p[0]-p[1],2)).mean())
      meanSquaredValid = math.sqrt(validPredicts.map(lambda p: pow(p[0]-p[1],2)).mean())
      print("%d, %5.3f -> %.4f, %.4f" % (numIter, step, meanSquared, meanSquaredValid))

Beispiel #31

0

Datei anzeigen

Datei: Visualization.py Projekt: x2ever/Python-Spark-Linear-Regresssion

sc = SparkContext()
train_data = sc.textFile("train.csv")
test_data = sc.textFile("test.csv")

parsedTrainData = train_data.map(parsePoint).filter(lambda x: x is not None)
parsedTestData = test_data.map(parsePoint).filter(lambda x: x is not None)

mat = RowMatrix(train_data.map(parseVector).filter(lambda x: x is not None))
pc = mat.computePrincipalComponents(2)
projected = mat.multiply(pc)

x = [vector[0] for vector in projected.rows.collect()]
y = [vector[1] for vector in projected.rows.collect()]

LinearModel = LinearRegressionModel.load(sc, "Linear")
RidgeModel = RidgeRegressionModel.load(sc, "Ridge")
LassoModel = LassoModel.load(sc, "Lasso")

valuesAndPredsLinearTrain = parsedTrainData.map(
    lambda p: (p.label, LinearModel.predict(p.features)))
valuesAndPredsLinearTest = parsedTestData.map(
    lambda p: (p.label, LinearModel.predict(p.features)))

valuesAndPredsRidgeTrain = parsedTrainData.map(
    lambda p: (p.label, RidgeModel.predict(p.features)))
valuesAndPredsRidgeTest = parsedTestData.map(
    lambda p: (p.label, RidgeModel.predict(p.features)))

valuesAndPredsLassoTrain = parsedTrainData.map(
    lambda p: (p.label, LassoModel.predict(p.features)))

Beispiel #32

0

Datei anzeigen

Datei: LinearRegression_narrow.py Projekt: bsangee/spark_vs_r

	return LabeledPoint(values[7], values[0:11]) 

#data_file = sc.textFile("/home/faiz89/Desktop/Eastman/2008.csv")
data_file = sc.textFile("../2008_small.csv")
header = data_file.first ()
raw_data = data_file.filter (lambda x:x != header)

#examples = MLUtils.loadLibSVMFile(sc, "2008.csv").collect()
parsedData = raw_data.map(parsePoint)
(trainingData, testData) = parsedData.randomSplit([0.7, 0.3])
startTime = datetime.now()

# Build the model
trainingData.cache ()
model = LinearRegressionWithSGD.train(trainingData, iterations=1)
print ('Training Time consumed = '), (datetime.now() - startTime)
startTestTime = datetime.now()
testData.cache()
# Evaluating the model on training data
valuesAndPreds = testData.map(lambda p: (p.label, model.predict(p.features)))
MSE = valuesAndPreds \
    .map(lambda (v, p): (v - p)**2) \
    .reduce(lambda x, y: x + y) / valuesAndPreds.count()
print ('Testing Time consumed = '), (datetime.now() - startTestTime)
print ('Total Time: '), (datetime.now() - startTime)

print("Mean Squared Error = " + str(MSE))
# Save and load model
model.save(sc, "LinearRegressionNarrow2008_cache_both_train_and_test")
sameModel = LinearRegressionModel.load(sc, "LinearRegressionNarrow2008_cache_both_train_and_test")

Beispiel #33

0

Datei anzeigen

from pyspark.sql.functions import *
from datetime import datetime,timedelta
from pyspark.mllib.regression import LabeledPoint,LinearRegressionWithSGD,LinearRegressionModel
from pyspark.mllib.feature import HashingTF

inputPath = '/user/ssambasi/SFPD_parquet'
conf = SparkConf().setAppName('Predict Alarming District')
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)

crimeDF =sqlContext.read.parquet(inputPath).cache()

htf = HashingTF(5000)

#Load the model
lrm = LinearRegressionModel.load(sc, '/user/ssambasi/sfo/CrimeCountPredictionModel')

#Load test data for demo
districtRDD = crimeDF.select('PdDistrict').distinct().rdd.filter(lambda r:r[0]!='').map(lambda r:r[0]).cache()
startDate = datetime.now()
dateList = []
for dateIndex in range(0,30):
    dateList.append(startDate + timedelta(days=dateIndex))
dateRDD = sc.parallelize(dateList).cache()
testDataRDD = districtRDD.cartesian(dateRDD).map(lambda (district,date): \
                ((district,date),LabeledPoint(1.0,htf.transform((district,date))))).cache()

#Predict Alarming District using the model

def GetMaXCount((district1,count1),(district2,count2)):
    if(count1>count2):