def test_string(self): lr = LogisticRegression() for col in ['features', u'features', np.str_('features')]: lr.setFeaturesCol(col) self.assertEqual(lr.getFeaturesCol(), 'features') self.assertRaises(TypeError, lambda: LogisticRegression(featuresCol=2.3))
# TEST - Run this cell to test your solution from pyspark.ml import Pipeline from pyspark.ml.classification import LogisticRegression from pyspark.ml.feature import StringIndexer, VectorAssembler dbTest("ML1-P-07-02-01", True, type(indexer) == type(StringIndexer())) dbTest("ML1-P-07-02-02", True, indexer.getInputCol() == 'species') dbTest("ML1-P-07-02-03", True, indexer.getOutputCol() == 'speciesClass') dbTest("ML1-P-07-02-04", True, type(assembler) == type(VectorAssembler())) dbTest("ML1-P-07-02-05", True, assembler.getInputCols() == irisDF.columns[:-1]) dbTest("ML1-P-07-02-06", True, assembler.getOutputCol() == 'features') dbTest("ML1-P-07-02-07", True, type(multinomialRegression) == type(LogisticRegression())) dbTest("ML1-P-07-02-08", True, multinomialRegression.getLabelCol() == "speciesClass") dbTest("ML1-P-07-02-09", True, multinomialRegression.getFeaturesCol() == 'features') dbTest("ML1-P-07-02-10", True, type(pipeline) == type(Pipeline())) print("Tests passed!") # COMMAND ---------- # MAGIC %md # MAGIC ### Step 3: Train the Model and Transform the Dataset # MAGIC # MAGIC Train the model and save the results to `multinomialModel` and save the transformed dataset to `irisTransformedDF`. # COMMAND ---------- # TODO
def test_string(self): lr = LogisticRegression() for col in ['features', u'features', np.str_('features')]: lr.setFeaturesCol(col) self.assertEqual(lr.getFeaturesCol(), 'features') self.assertRaises(TypeError, lambda: LogisticRegression(featuresCol=2.3))
# schema = StructType(fields) # # Apply the schema to the RDD. # schemaPeople = spark.createDataFrame(people, schema) # labelIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel").fit(data) # schemaPeople.show() # lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8) lr = LogisticRegression(maxIter=5, regParam=0.01) # lr = LogisticRegressionWithSGD.train(iterations=10) # Fit the model lrModel = lr.fit(inputDF) lrcol = lr.getFeaturesCol() # Print the coefficients and intercept for logistic regression print("Coefficients: " + str(lrModel.coefficients)) print("Intercept: " + str(lrModel.intercept)) result = lrModel.transform(testDF).head() # result.prediction # test1 = sc.parallelize([Row(features=MLLibVectors.dense([2,102,30,4.385]))]).toDF() test1 = sc.parallelize( [Row(features=MLLibVectors.dense([2, 173, 30, 8.281666667]))]).toDF() result1 = lrModel.transform(test1) print result1.collect() print(result1.prediction)