예제 #1
0
assembler = VectorAssembler().setInputCols(bands) \
                                 .setOutputCol("features")

assembled_df_train = assembler.transform(exploded_tiles_filtered_train)
assembled_df_test = assembler.transform(exploded_tiles_filtered_test)

classifier = RandomForestClassifier().setLabelCol('label') \
                        .setFeaturesCol(assembler.getOutputCol())

model = classifier.fit(assembled_df_train.cache())

prediction_df = model.transform(assembled_df_test).drop(assembler.getOutputCol()).cache()

evaluator = MulticlassClassificationEvaluator(
                predictionCol=classifier.getPredictionCol(),
                labelCol=classifier.getLabelCol(),
                metricName='accuracy'
)

accuracy = evaluator.evaluate(prediction_df)
print("\nAccuracy:", accuracy)

cnf_mtrx = prediction_df.groupBy(classifier.getPredictionCol()) \
    .pivot(classifier.getLabelCol()) \
    .count() \
    .sort(classifier.getPredictionCol())
cnf_mtrx

print("Tempo do calculo da cnf_matrix: "+str(datetime.datetime.now() - begin_time))

begin_time = datetime.datetime.now()