Exemple #1
0
 def test_string(self):
     lr = LogisticRegression()
     for col in ['features', u'features', np.str_('features')]:
         lr.setFeaturesCol(col)
         self.assertEqual(lr.getFeaturesCol(), 'features')
     self.assertRaises(TypeError,
                       lambda: LogisticRegression(featuresCol=2.3))
Exemple #2
0
# TEST - Run this cell to test your solution
from pyspark.ml import Pipeline
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.feature import StringIndexer, VectorAssembler

dbTest("ML1-P-07-02-01", True, type(indexer) == type(StringIndexer()))
dbTest("ML1-P-07-02-02", True, indexer.getInputCol() == 'species')
dbTest("ML1-P-07-02-03", True, indexer.getOutputCol() == 'speciesClass')

dbTest("ML1-P-07-02-04", True, type(assembler) == type(VectorAssembler()))
dbTest("ML1-P-07-02-05", True, assembler.getInputCols() == irisDF.columns[:-1])
dbTest("ML1-P-07-02-06", True, assembler.getOutputCol() == 'features')

dbTest("ML1-P-07-02-07", True, type(multinomialRegression) == type(LogisticRegression()))
dbTest("ML1-P-07-02-08", True, multinomialRegression.getLabelCol() == "speciesClass")
dbTest("ML1-P-07-02-09", True, multinomialRegression.getFeaturesCol() == 'features')

dbTest("ML1-P-07-02-10", True, type(pipeline) == type(Pipeline()))

print("Tests passed!")

# COMMAND ----------

# MAGIC %md
# MAGIC ### Step 3: Train the Model and Transform the Dataset
# MAGIC 
# MAGIC Train the model and save the results to `multinomialModel` and save the transformed dataset to `irisTransformedDF`.

# COMMAND ----------

# TODO
Exemple #3
0
 def test_string(self):
     lr = LogisticRegression()
     for col in ['features', u'features', np.str_('features')]:
         lr.setFeaturesCol(col)
         self.assertEqual(lr.getFeaturesCol(), 'features')
     self.assertRaises(TypeError, lambda: LogisticRegression(featuresCol=2.3))
# schema = StructType(fields)

# # Apply the schema to the RDD.
# schemaPeople = spark.createDataFrame(people, schema)

# labelIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel").fit(data)
# schemaPeople.show()

# lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)

lr = LogisticRegression(maxIter=5, regParam=0.01)
# lr = LogisticRegressionWithSGD.train(iterations=10)

# Fit the model
lrModel = lr.fit(inputDF)
lrcol = lr.getFeaturesCol()

# Print the coefficients and intercept for logistic regression
print("Coefficients: " + str(lrModel.coefficients))
print("Intercept: " + str(lrModel.intercept))

result = lrModel.transform(testDF).head()
# result.prediction
# test1 = sc.parallelize([Row(features=MLLibVectors.dense([2,102,30,4.385]))]).toDF()
test1 = sc.parallelize(
    [Row(features=MLLibVectors.dense([2, 173, 30, 8.281666667]))]).toDF()

result1 = lrModel.transform(test1)
print result1.collect()
print(result1.prediction)