def invertIndexColm(self, infoData):
        originalColm = infoData.get(pc.COLMTOINDEX)
        stringIndexerPath = (infoData.get(
            pc.INDEXERPATHMAPPING)).get(originalColm)
        inverterColm = infoData.get(pc.PREDICTIONCOLM)
        testDataset = infoData.get(pc.TESTDATA)
        trainDataset = infoData.get(pc.TRAINDATA)
        infoData.update({
            pc.INDEXERPATH: stringIndexerPath,
            pc.COLMTOINVERT: inverterColm
        })
        """
        run the indexing part on test and train dataset seperately since needs to show the user accordingly
        """
        infoData.update({pc.DATASET: trainDataset})
        trainDataset = pu.indexToString(infoData)
        infoData.update({pc.DATASET: testDataset})
        testDataset = pu.indexToString(infoData)

        infoData.update({pc.TRAINDATA: trainDataset, pc.TESTDATA: testDataset})

        return infoData
Ejemplo n.º 2
0
 def invertIndex(self, infoData):
     originalColName = infoData.get(pc.ORIGINALCOLMNAME)
     indexerPath = (infoData.get(
         pc.INDEXERPATHMAPPING)).get(originalColName)
     infoData.update({pc.INDEXERPATH: indexerPath})
     dataset = pu.indexToString(infoData)
     infoData.update({pc.DATASET: dataset})
     """
     datasetTest = datasetTest.select("Text","Sentiment","prediction_knime", predictionColm)
     datasetTest.coalesce(
         1).write.mode("overwrite").format("com.databricks.spark.csv").option("header", "true").csv(
         "/home/fidel/Documents/decisionTreeKNIMEPrediction.csv")
     """
     return infoData