label = timeseries.map(lambda row: row[0]) labeled_data = label.zip(features_t) final_data = labeled_data.map(lambda row: LabeledPoint(row[0], row[1])) model = LinearRegressionWithSGD.train(final_data, 1000, .0000001, intercept=True) #model = RidgeRegressionWithSGD.train(final_data, 1000, .00000001, intercept=True) #model = LassoWithSGD.train(final_data, 1000, .00000001, intercept=True) modelList.append(model) #print "" #print "Model1 weights " + str(model.weights) #print "" prediObserRDD = final_data.map(lambda row: (float(model.predict(row.features)), row.label)) metrics = RegressionMetrics(prediObserRDD) print "1 R2 = " + str(metrics.r2) print "1 Root mean squared error = " + str(metrics.rootMeanSquaredError) '''print "Predicting model " preds = final_data.map(lambda p: p.features) values = final_data.map(lambda p: p.label) print "Printing preds " preds = model.predict(preds) print preds.take(10) print "" print "Printing label " print values.take(10) print ""'''