assembler = VectorAssembler().setInputCols(bands) \ .setOutputCol("features") assembled_df_train = assembler.transform(exploded_tiles_filtered_train) assembled_df_test = assembler.transform(exploded_tiles_filtered_test) classifier = RandomForestClassifier().setLabelCol('label') \ .setFeaturesCol(assembler.getOutputCol()) model = classifier.fit(assembled_df_train.cache()) prediction_df = model.transform(assembled_df_test).drop(assembler.getOutputCol()).cache() evaluator = MulticlassClassificationEvaluator( predictionCol=classifier.getPredictionCol(), labelCol=classifier.getLabelCol(), metricName='accuracy' ) accuracy = evaluator.evaluate(prediction_df) print("\nAccuracy:", accuracy) cnf_mtrx = prediction_df.groupBy(classifier.getPredictionCol()) \ .pivot(classifier.getLabelCol()) \ .count() \ .sort(classifier.getPredictionCol()) cnf_mtrx print("Tempo do calculo da cnf_matrix: "+str(datetime.datetime.now() - begin_time)) begin_time = datetime.datetime.now()