Example #1
0
 def reload_and_compare(ovr, suffix):
     model = ovr.fit(df)
     ovrPath = temp_path + "/{}".format(suffix)
     ovr.save(ovrPath)
     loadedOvr = OneVsRest.load(ovrPath)
     self._compare_pipelines(ovr, loadedOvr)
     modelPath = temp_path + "/{}Model".format(suffix)
     model.save(modelPath)
     loadedModel = OneVsRestModel.load(modelPath)
     self._compare_pipelines(model, loadedModel)
Example #2
0
 def test_onevsrest(self):
     temp_path = tempfile.mkdtemp()
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))] * 10,
                                     ["label", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     model = ovr.fit(df)
     ovrPath = temp_path + "/ovr"
     ovr.save(ovrPath)
     loadedOvr = OneVsRest.load(ovrPath)
     self._compare_pipelines(ovr, loadedOvr)
     modelPath = temp_path + "/ovrModel"
     model.save(modelPath)
     loadedModel = OneVsRestModel.load(modelPath)
     self._compare_pipelines(model, loadedModel)
Example #3
0
 def test_onevsrest(self):
     temp_path = tempfile.mkdtemp()
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))] * 10,
                                     ["label", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     model = ovr.fit(df)
     ovrPath = temp_path + "/ovr"
     ovr.save(ovrPath)
     loadedOvr = OneVsRest.load(ovrPath)
     self._compare_pipelines(ovr, loadedOvr)
     modelPath = temp_path + "/ovrModel"
     model.save(modelPath)
     loadedModel = OneVsRestModel.load(modelPath)
     self._compare_pipelines(model, loadedModel)
Example #4
0
 def test_save_load(self):
     temp_path = tempfile.mkdtemp()
     sqlContext = SQLContext(self.sc)
     df = sqlContext.createDataFrame(
         [(0.0, Vectors.dense(1.0, 0.8)), (1.0, Vectors.sparse(2, [], [])), (2.0, Vectors.dense(0.5, 0.5))],
         ["label", "features"],
     )
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     model = ovr.fit(df)
     ovrPath = temp_path + "/ovr"
     ovr.save(ovrPath)
     loadedOvr = OneVsRest.load(ovrPath)
     self.assertEqual(loadedOvr.getFeaturesCol(), ovr.getFeaturesCol())
     self.assertEqual(loadedOvr.getLabelCol(), ovr.getLabelCol())
     self.assertEqual(loadedOvr.getClassifier().uid, ovr.getClassifier().uid)
     modelPath = temp_path + "/ovrModel"
     model.save(modelPath)
     loadedModel = OneVsRestModel.load(modelPath)
     for m, n in zip(model.models, loadedModel.models):
         self.assertEqual(m.uid, n.uid)
Example #5
0
 def test_save_load(self):
     temp_path = tempfile.mkdtemp()
     sqlContext = SQLContext(self.sc)
     df = sqlContext.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))],
                                     ["label", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     model = ovr.fit(df)
     ovrPath = temp_path + "/ovr"
     ovr.save(ovrPath)
     loadedOvr = OneVsRest.load(ovrPath)
     self.assertEqual(loadedOvr.getFeaturesCol(), ovr.getFeaturesCol())
     self.assertEqual(loadedOvr.getLabelCol(), ovr.getLabelCol())
     self.assertEqual(loadedOvr.getClassifier().uid,
                      ovr.getClassifier().uid)
     modelPath = temp_path + "/ovrModel"
     model.save(modelPath)
     loadedModel = OneVsRestModel.load(modelPath)
     for m, n in zip(model.models, loadedModel.models):
         self.assertEqual(m.uid, n.uid)
Example #6
0
labelZeroDf = readImages(imageDir + "tl0").withColumn("label", lit(0))
labelOneDf = readImages(imageDir + "tl1").withColumn("label", lit(1))
labelTwoDf = readImages(imageDir + "tl2").withColumn("label", lit(2))
labelThreeDf = readImages(imageDir + "tl3").withColumn("label", lit(3))
labelFourDf = readImages(imageDir + "tl4").withColumn("label", lit(4))
finalTestDf = labelZeroDf.unionAll(labelOneDf).unionAll(labelTwoDf).unionAll(
    labelThreeDf).unionAll(labelFourDf)

testSize = finalTestDf.count()
print(str(testSize))
featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDf)
model_dc = OneVsRestModel.load(imageDir + 'model-decision-tree-classifier-new')

predictions = model_dc.transform(featureVector)
predictions.show()

evaluator = MulticlassClassificationEvaluator(labelCol="label",
                                              predictionCol="prediction",
                                              metricName="accuracy")
accuracy = evaluator.evaluate(predictions)
print("Test Data set accuracy with decision-tree-classifieer for " +
      str(testSize) + " images = " + str(accuracy) + " and error " +
      str(1 - accuracy))

evaluator = MulticlassClassificationEvaluator(labelCol="label",
                                              predictionCol="prediction",
                                              metricName="weightedPrecision")
Example #7
0
tmpTestDf = readImages(imageDir + "test5")
tmpTestRDD = tmpTestDf.rdd.map(
    lambda x: Row(filepath=x[0], image=x[1], fileName=getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option(
    "header", "true").load(imageDir + "test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x: Row(image=x[0], label=int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image,
                                   'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)
model_dr = OneVsRestModel.load(imageDir + 'model-dicision-tree-regression')
predictions = model_dr.transform(featureVector)
predictions.persist()
predictions.select("filePath", "prediction").show(truncate=False)

predictionAndLabels = predictions.select("prediction", "label")

#predictionAndLabels.show()

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Decision tree regression = " +
      str(evaluator.evaluate(predictionAndLabels)) + " and error " +
      str(1 - evaluator.evaluate(predictionAndLabels)))

#regression evaluator
#evaluator = RegressionEvaluator(
Example #8
0
imageDir = "hdfs://192.168.65.188:8020/paih/"

def getFileName (filePath) :
	fileName = os.path.basename(filePath).split(".")[0]
	return fileName

# Prepare Test Data
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(lambda x : Row(filepath = x[0], image = x[1], fileName = getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option("header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x : Row(image = x[0], label = int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image, 'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",outputCol="features", modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)
model_rf = OneVsRestModel.load('hdfs://192.168.65.188:8020/paih/model-randomForest')
predictions = model_rf.transform(featureVector)
predictions.persist()
predictions.select("filePath", "prediction").show(truncate=False)

predictionAndLabels = predictions.select("prediction", "label")
evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Logistic Regression = " + str(evaluator.evaluate(predictionAndLabels)) + " and error " + str(1 - evaluator.evaluate(predictionAndLabels)))

#predictionAndLabels.show()

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Random Forest = " + str(evaluator.evaluate(predictionAndLabels)) + " and error " + str(1 - evaluator.evaluate(predictionAndLabels)))
Example #9
0
    if opt == "test" or opt == "all":

        print "Loading test data..."
        test_data = sc.textFile(golden_file)
        parsed_test_data = test_data.map(kup.parse_multiClass)
        parsed_test_data_df = spark.createDataFrame(parsed_test_data,
                                                    ["label", "features"])

        # load the scaler and perform feature scaling on test data
        scalerModel = StandardScalerModel.load(scalerPath)
        test_df_tmp = scalerModel.transform(parsed_test_data_df)
        test_df = test_df_tmp.drop("features").withColumnRenamed(
            "scaledFeatures", "features")

        print "Predicting test data..."
        lrModel = OneVsRestModel.load(logreg_path)
        testPrediction = lrModel.transform(test_df)

        testPrediction.printSchema()
        testPrediction.show(5)

        print "eval..."

        evaluator = MulticlassClassificationEvaluator(metricName="accuracy")

        # compute the classification error on test data.
        accuracy = evaluator.evaluate(testPrediction)
        print("Test Error = %g" % (1.0 - accuracy))

        predictionAndLabels = testPrediction.select("label", "prediction").rdd
Example #10
0
# Prepare Test Data
# Prepare Test Data
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(
    lambda x: Row(filepath=x[0], image=x[1], fileName=getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option(
    "header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x: Row(image=x[0], label=int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image,
                                   'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)

#model_nb = OneVsRestModel.load('hdfs://192.168.65.188:8020/paih/model-naive-bayes')
model_rf = OneVsRestModel.load(imgDir + 'model-random-forest')

predictions = model_nb.transform(featureVector)

print '***Transform complete***'
predictionAndLabels = predictions.select("prediction", "label")

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Random Forest = " +
      str(evaluator.evaluate(predictionAndLabels)) + " and error " +
      str(1 - evaluator.evaluate(predictionAndLabels)))
Example #11
0
#imageDir = "/media/prateek/2A8BEA421AC55874/PAIH/work"
imageDir = "hdfs://192.168.65.188:8020/paih/"

def getFileName (filePath) :
	fileName = os.path.basename(filePath).split(".")[0]
	return fileName

# Prepare Test Data
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(lambda x : Row(filepath = x[0], image = x[1], fileName = getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option("header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x : Row(image = x[0], label = int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image, 'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",outputCol="features", modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)
model_dc = OneVsRestModel.load('hdfs://192.168.65.188:8020/paih/model-dicision-tree-classifier')
predictions = model_dc.transform(featureVector)
predictions.persist()
predictions.select("filePath", "prediction").show(truncate=False)

predictionAndLabels = predictions.select("prediction", "label")

#predictionAndLabels.show()

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Decision tree Classifier = " + str(evaluator.evaluate(predictionAndLabels)) + " and error " + str(1 - evaluator.evaluate(predictionAndLabels)))
Example #12
0
def getFileName (filePath) :
	fileName = os.path.basename(filePath).split(".")[0]
	return fileName

# Prepare Test Data
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(lambda x : Row(filepath = x[0], image = x[1], fileName = getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option("header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x : Row(image = x[0], label = int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image, 'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",outputCol="features", modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)
model_dr = OneVsRestModel.load('hdfs://192.168.65.188:8020/paih/model-dicision-tree-regression')
predictions = model_dr.transform(featureVector)
predictions.persist()
predictions.select("filePath", "prediction").show(truncate=False)

predictionAndLabels = predictions.select("prediction", "label")

#predictionAndLabels.show()

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Decision tree regression = " + str(evaluator.evaluate(predictionAndLabels)) + " and error " + str(1 - evaluator.evaluate(predictionAndLabels)))

#regression evaluator
#evaluator = RegressionEvaluator(
 #   labelCol="label", predictionCol="prediction", metricName="rmse")
#rmse = evaluator.evaluate(predictions)
    for row in disease_pred.rdd.collect():
        if row['prediction'] == 0:
            risk[row['label']] = 'low'

    level_pred = model_level.transform(data)
    for row in level_pred.rdd.collect():
        if row['label'] not in risk:
            risk[row['label']] = 'high' if row['prediction'] == 0 else 'medium'
    return risk


if __name__ == "__main__":
    spark = SparkSession\
        .builder\
        .appName("linearSVC Example")\
        .getOrCreate()

    # Load user data
    inputData = spark.read.format("libsvm").load("data/user_data")

    # load models
    prediction_model = LinearSVCModel.load(
        "models/HeartDisearsePredictionModel")
    level_model = OneVsRestModel.load("models/HeartDisearseLevelModel")
    risk_level = predict(inputData, prediction_model, level_model)

    with open('predict.csv', 'w') as outfile:
        for id, level in risk_level.items():
            outfile.write(str(int(id)) + ","+str(level)+"\n")
    spark.stop()
Example #14
0
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(
    lambda x: Row(filepath=x[0], image=x[1], fileName=getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option(
    "header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x: Row(image=x[0], label=int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image,
                                   'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)
model_lr = OneVsRestModel.load(
    'hdfs://192.168.65.188:8020/paih/model-gradiant-boosted-tree-classifier')
predictions = model_lr.transform(featureVector)
predictions.persist()
predictions.select("filePath", "prediction").show(truncate=False)

predictionAndLabels = predictions.select("prediction", "label")

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Logistic Regression = " +
      str(evaluator.evaluate(predictionAndLabels)) + " and error " +
      str(1 - evaluator.evaluate(predictionAndLabels)))
#predictionAndLabels.show()

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with  Gradiant boosted tree classifier = " +
      str(evaluator.evaluate(predictionAndLabels)) + " and error " +
Example #15
0
# Prepare Test Data
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(
    lambda x: Row(filepath=x[0], image=x[1], fileName=getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option(
    "header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x: Row(image=x[0], label=int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image,
                                   'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)
model_svm = OneVsRestModel.load(
    'hdfs://192.168.65.188:8020/paih/support-vector-machine')
predictions = model_svm.transform(featureVector)
predictions.persist()
predictions.select("filePath", "prediction").show(truncate=False)

predictionAndLabels = predictions.select("prediction", "label")

#predictionAndLabels.show()

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with support-vector-machine  = " +
      str(evaluator.evaluate(predictionAndLabels)) + " and error " +
      str(1 - evaluator.evaluate(predictionAndLabels)))
Example #16
0
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(
    lambda x: Row(filepath=x[0], image=x[1], fileName=getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option(
    "header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x: Row(image=x[0], label=int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image,
                                   'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)
model_lr = OneVsRestModel.load(
    'hdfs://192.168.65.188:8020/paih/model-logistic-regression')
predictions = model_lr.transform(featureVector)
predictions.persist()
predictions.select("filePath", "prediction").show(truncate=False)

predictionAndLabels = predictions.select("prediction", "label")
evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Logistic Regression = " +
      str(evaluator.evaluate(predictionAndLabels)) + " and error " +
      str(1 - evaluator.evaluate(predictionAndLabels)))

#predictionAndLabels.show()

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Logistic Regression = " +
      str(evaluator.evaluate(predictionAndLabels)) + " and error " +
Example #17
0
# Prepare Test Data
# Prepare Test Data
tmpTestDf = readImages(imageDir + "train25_2")
tmpTestRDD = tmpTestDf.rdd.map(
    lambda x: Row(filepath=x[0], image=x[1], fileName=getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option(
    "header", "true").load(imageDir + "train25_2.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x: Row(image=x[0], label=int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image,
                                   'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)
del tmpTestDf
del tmpTestRDD
del tmpTestX
del csvTestTmp
del csvTestRDD
del csvTest
del finalTestDataFrame

model_nb = OneVsRestModel.load(imageDir + 'model-naive-bayes')

model_nb.fit(featureVector)
model_nb.write().overwrite().save(imageDir + 'model-naive-bayes-retrained')
print '***re-train complete***'
Example #18
0
def getFileName (filePath) :
	fileName = os.path.basename(filePath).split(".")[0]
	return fileName

# Prepare Test Data
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(lambda x : Row(filepath = x[0], image = x[1], fileName = getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option("header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x : Row(image = x[0], label = int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image, 'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",outputCol="features", modelName="InceptionV3")

featureVector = featurizer.transform(finalTestDataFrame)
model_lr = OneVsRestModel.load(imageDir + 'model-logistic-regression')
#p = Pipeline(stages=[featurizer, model_lr])
predictions = model_lr.transform(featureVector)
#predictions.persist()
#predictions.select("filePath", "prediction").show(truncate=False)

#predictionAndLabels = predictions.select("prediction", "label")
evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Logistic Regression = " + str(evaluator.evaluate(predictions)) + " and error " + str(1 - evaluator.evaluate(predictions)))

#predictionAndLabels.show()

#evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
#print("Test Data set accuracy with Logistic Regression = " + str(evaluator.evaluate(predictionAndLabels)) + " and error " + str(1 - evaluator.evaluate(predictionAndLabels)))
Example #19
0
# Prepare Test Data
# Prepare Test Data
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(
    lambda x: Row(filepath=x[0], image=x[1], fileName=getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option(
    "header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x: Row(image=x[0], label=int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image,
                                   'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)

model_nb = OneVsRestModel.load(
    'hdfs://192.168.65.188:8020/paih/model-naive-bayes')

predictions = model_nb.transform(featureVector)

print '***Transform complete***'
predictionAndLabels = predictions.select("prediction", "label")
evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with Naive Bayes classifier = " +
      str(evaluator.evaluate(predictionAndLabels)) + " and error " +
      str(1 - evaluator.evaluate(predictionAndLabels)))
Example #20
0
# Prepare Test Data
tmpTestDf = readImages(imageDir + "/test5")
tmpTestRDD = tmpTestDf.rdd.map(
    lambda x: Row(filepath=x[0], image=x[1], fileName=getFileName(x[0])))
tmptestX = tmpTestRDD.toDF()
csvTestTmp = spark.read.format("csv").option(
    "header", "true").load(imageDir + "/test5.csv")
csvTestRDD = csvTestTmp.rdd.map(lambda x: Row(image=x[0], label=int(x[1])))
csvTest = csvTestRDD.toDF()
finalTestDataFrame = tmptestX.join(csvTest, tmptestX.fileName == csvTest.image,
                                   'inner').drop(csvTest.image)

featurizer = DeepImageFeaturizer(inputCol="image",
                                 outputCol="features",
                                 modelName="InceptionV3")
featureVector = featurizer.transform(finalTestDataFrame)

#model_nb = OneVsRestModel.load('hdfs://192.168.65.188:8020/paih/model-naive-bayes')
model_gbt = OneVsRestModel.load(imgDir +
                                '/model-gradiant-boosted-tre-classifier')

predictions = model_gbt.transform(featureVector)

print '***Transform complete***'
predictionAndLabels = predictions.select("prediction", "label")

evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test Data set accuracy with gradiant boosted tree classifier = " +
      str(evaluator.evaluate(predictionAndLabels)) + " and error " +
      str(1 - evaluator.evaluate(predictionAndLabels)))