def test_int_to_float(self): from pyspark.mllib.linalg import Vectors df = self.sc.parallelize([ Row(label=1.0, weight=2.0, features=Vectors.dense(1.0))]).toDF() lr = LogisticRegression(elasticNetParam=0) lr.fit(df) lr.setElasticNetParam(0) lr.fit(df)
def test_int_to_float(self): from pyspark.mllib.linalg import Vectors df = self.sc.parallelize( [Row(label=1.0, weight=2.0, features=Vectors.dense(1.0))]).toDF() lr = LogisticRegression(elasticNetParam=0) lr.fit(df) lr.setElasticNetParam(0) lr.fit(df)
def test_invalid_to_float(self): from pyspark.mllib.linalg import Vectors self.assertRaises(Exception, lambda: LogisticRegression(elasticNetParam="happy")) lr = LogisticRegression(elasticNetParam=0) self.assertRaises(Exception, lambda: lr.setElasticNetParam("panda"))
# COMMAND ---------- partialPipeline = Pipeline().setStages([tokenizer, remover, counts, binarizer]) preppedData = partialPipeline.fit(train).transform(train) lrModel = LogisticRegression().fit(preppedData) display(lrModel, preppedData, "ROC") # COMMAND ---------- print(lr.explainParams()) # COMMAND ---------- lr.setRegParam(0.01) lr.setElasticNetParam(0.1) counts.setVocabSize(1000) model = p.fit(train) result = model.transform(test) print("AUC %(result)s" % {"result": BinaryClassificationEvaluator().evaluate(result)}) # COMMAND ---------- # DBTITLE 1,Serialize model model.write().overwrite().save("/mnt/mikem/models/amazon-model") # COMMAND ---------- # MAGIC %fs ls /mnt/mikem/models/amazon-model/
def test_invalid_to_float(self): from pyspark.mllib.linalg import Vectors self.assertRaises(Exception, lambda: LogisticRegression(elasticNetParam="happy")) lr = LogisticRegression(elasticNetParam=0) self.assertRaises(Exception, lambda: lr.setElasticNetParam("panda"))
exit(-1) sc = SparkContext(appName="PythonLogisticRegressionExample") sqlContext = SQLContext(sc) # Load and parse the data file into a dataframe. df = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF() # Map labels into an indexed column of labels in [0, numLabels) stringIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel") si_model = stringIndexer.fit(df) td = si_model.transform(df) [training, test] = td.randomSplit([0.7, 0.3]) lr = LogisticRegression(maxIter=100, regParam=0.3).setLabelCol("indexedLabel") lr.setElasticNetParam(0.8) # Fit the model lrModel = lr.fit(training) predictionAndLabels = lrModel.transform(test).select("prediction", "indexedLabel") \ .map(lambda x: (x.prediction, x.indexedLabel)) metrics = MulticlassMetrics(predictionAndLabels) print("weighted f-measure %.3f" % metrics.weightedFMeasure()) print("precision %s" % metrics.precision()) print("recall %s" % metrics.recall()) sc.stop()
print("Usage: logistic_regression", file=sys.stderr) exit(-1) sc = SparkContext(appName="PythonLogisticRegressionExample") sqlContext = SQLContext(sc) # Load the data stored in LIBSVM format as a DataFrame. df = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") # Map labels into an indexed column of labels in [0, numLabels) stringIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel") si_model = stringIndexer.fit(df) td = si_model.transform(df) [training, test] = td.randomSplit([0.7, 0.3]) lr = LogisticRegression(maxIter=100, regParam=0.3).setLabelCol("indexedLabel") lr.setElasticNetParam(0.8) # Fit the model lrModel = lr.fit(training) predictionAndLabels = lrModel.transform(test).select("prediction", "indexedLabel") \ .map(lambda x: (x.prediction, x.indexedLabel)) metrics = MulticlassMetrics(predictionAndLabels) print("weighted f-measure %.3f" % metrics.weightedFMeasure()) print("precision %s" % metrics.precision()) print("recall %s" % metrics.recall()) sc.stop()