Exemple #1
0
 def test_int_to_float(self):
     from pyspark.mllib.linalg import Vectors
     df = self.sc.parallelize([
         Row(label=1.0, weight=2.0, features=Vectors.dense(1.0))]).toDF()
     lr = LogisticRegression(elasticNetParam=0)
     lr.fit(df)
     lr.setElasticNetParam(0)
     lr.fit(df)
Exemple #2
0
 def test_int_to_float(self):
     from pyspark.mllib.linalg import Vectors
     df = self.sc.parallelize(
         [Row(label=1.0, weight=2.0, features=Vectors.dense(1.0))]).toDF()
     lr = LogisticRegression(elasticNetParam=0)
     lr.fit(df)
     lr.setElasticNetParam(0)
     lr.fit(df)
Exemple #3
0
 def test_invalid_to_float(self):
     from pyspark.mllib.linalg import Vectors
     self.assertRaises(Exception,
                       lambda: LogisticRegression(elasticNetParam="happy"))
     lr = LogisticRegression(elasticNetParam=0)
     self.assertRaises(Exception, lambda: lr.setElasticNetParam("panda"))
Exemple #4
0
# COMMAND ----------

partialPipeline = Pipeline().setStages([tokenizer, remover, counts, binarizer])
preppedData = partialPipeline.fit(train).transform(train)
lrModel = LogisticRegression().fit(preppedData)
display(lrModel, preppedData, "ROC")

# COMMAND ----------

print(lr.explainParams())

# COMMAND ----------

lr.setRegParam(0.01)
lr.setElasticNetParam(0.1)
counts.setVocabSize(1000)
model = p.fit(train)
result = model.transform(test)
print("AUC %(result)s" %
      {"result": BinaryClassificationEvaluator().evaluate(result)})

# COMMAND ----------

# DBTITLE 1,Serialize model
model.write().overwrite().save("/mnt/mikem/models/amazon-model")

# COMMAND ----------

# MAGIC %fs ls /mnt/mikem/models/amazon-model/
Exemple #5
0
 def test_invalid_to_float(self):
     from pyspark.mllib.linalg import Vectors
     self.assertRaises(Exception, lambda: LogisticRegression(elasticNetParam="happy"))
     lr = LogisticRegression(elasticNetParam=0)
     self.assertRaises(Exception, lambda: lr.setElasticNetParam("panda"))
Exemple #6
0
        exit(-1)

    sc = SparkContext(appName="PythonLogisticRegressionExample")
    sqlContext = SQLContext(sc)

    # Load and parse the data file into a dataframe.
    df = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()

    # Map labels into an indexed column of labels in [0, numLabels)
    stringIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel")
    si_model = stringIndexer.fit(df)
    td = si_model.transform(df)
    [training, test] = td.randomSplit([0.7, 0.3])

    lr = LogisticRegression(maxIter=100,
                            regParam=0.3).setLabelCol("indexedLabel")
    lr.setElasticNetParam(0.8)

    # Fit the model
    lrModel = lr.fit(training)

    predictionAndLabels = lrModel.transform(test).select("prediction", "indexedLabel") \
        .map(lambda x: (x.prediction, x.indexedLabel))

    metrics = MulticlassMetrics(predictionAndLabels)
    print("weighted f-measure %.3f" % metrics.weightedFMeasure())
    print("precision %s" % metrics.precision())
    print("recall %s" % metrics.recall())

    sc.stop()
        print("Usage: logistic_regression", file=sys.stderr)
        exit(-1)

    sc = SparkContext(appName="PythonLogisticRegressionExample")
    sqlContext = SQLContext(sc)

    # Load the data stored in LIBSVM format as a DataFrame.
    df = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")

    # Map labels into an indexed column of labels in [0, numLabels)
    stringIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel")
    si_model = stringIndexer.fit(df)
    td = si_model.transform(df)
    [training, test] = td.randomSplit([0.7, 0.3])

    lr = LogisticRegression(maxIter=100, regParam=0.3).setLabelCol("indexedLabel")
    lr.setElasticNetParam(0.8)

    # Fit the model
    lrModel = lr.fit(training)

    predictionAndLabels = lrModel.transform(test).select("prediction", "indexedLabel") \
        .map(lambda x: (x.prediction, x.indexedLabel))

    metrics = MulticlassMetrics(predictionAndLabels)
    print("weighted f-measure %.3f" % metrics.weightedFMeasure())
    print("precision %s" % metrics.precision())
    print("recall %s" % metrics.recall())

    sc.stop()