Esempio n. 1
0
 def run(self):
     self.df = pd.read_csv(self.inputDataPath)
     wb = xlsxwriter.Workbook(self.outputDataPath)
     fmtBold = wb.add_format({'bold': True})
     wsSummary = wb.add_worksheet("Summary")
     wsData = wb.add_worksheet("Data")
     wsChart = wb.add_worksheet("Chart")
     wsSummary.set_column("A:I", 18)
     wsData.set_column("A:I", 18)
     columns = sorted(self.df.columns)
     columns.insert(0, "K")
     i = 0
     for j in range(0, len(columns)):
         wsData.write(i, j, columns[j], fmtBold)
     i = 1
     beginRow = i
     for index, row in self.df.iterrows():
         wsData.write(i, 0, i)
         for j in range(1, len(columns)):
             wsData.write(i, j, row[columns[j]])
             j += 1
         i += 1
     endRow = i
     chart = wb.add_chart({"type": "scatter"})
     categories = "=Data!$A$2:$A101"
     series1 = "=Data!$B$2:$B101"
     series2 = "=Data!$C$2:$C101"
     series3 = "=Data!$D$2:$D101"
     chart.add_series({"categories": categories, "values": series1})
     chart.add_series({"categories": categories, "values": series2})
     chart.add_series({"categories": categories, "values": series3})
     chart.set_x_axis({"Name": "KFold"})
     chart.set_y_axis({"Name": "Score"})
     wsChart.insert_chart("A1", chart)
     columns.pop(0)
     stats = ["Classifier", "Mean", "Median", "Std", "Var", "Min", "Max"]
     for j in range(0, len(stats)):
         wsSummary.write(0, j, stats[j], fmtBold)
     for i in range(0, len(columns)):
         classifier = columns[i]
         sMean = Number.asFloat(np.mean(self.df[classifier]), 3)
         sMedian = Number.asFloat(np.median(self.df[classifier]), 3)
         sStd = Number.asFloat(np.std(self.df[classifier]), 3)
         sVar = Number.asFloat(np.var(self.df[classifier]), 3)
         sMin = np.min(self.df[classifier])
         sMax = np.max(self.df[classifier])
         stats = [classifier, sMean, sMedian, sStd, sVar, sMin, sMax]
         for j in range(0, len(stats)):
             if j == 0:
                 wsSummary.write(i + 1, j, stats[j], fmtBold)
             else:
                 wsSummary.write(i + 1, j, stats[j])
     wb.close()
     Msg.raw(self.df)
Esempio n. 2
0
 def run(self):
     self.initialize()
     scoreKeeper = Data.buildDictByKeys(self.classifiers.keys(), [])
     for i in range(0, self.cycles):
         Msg.show("Cycle {0} of {1}".format(i + 1, self.cycles))
         scores = self.build(self.inputDataPath)
         for classifier in scores.keys():
             scoreKeeper[classifier].extend(scores[classifier])
     df = pd.DataFrame.from_dict(scoreKeeper)
     Msg.raw(df)
     df.to_csv(self.outputDataPath, index=False, float_format='%.2f')
     Msg.show("Saved results -> {0}".format(self.outputDataPath))
Esempio n. 3
0
 def build(self, inputDataPath):
     Msg.raw("Analyzing <-> {0}".format(inputDataPath))
     self.ingestData(inputDataPath)
     kf = KFold(n_splits=self.kFolds,
                shuffle=self.randomizeDataFlag,
                random_state=None)
     X = self.dfFeatures.values
     y = self.dfLabel.values.astype(dtype="int64")
     scoreKeeper = {}
     for name, classifier in sorted(list(self.classifiers.items())):
         scores = cross_val_score(classifier, X, y, cv=kf)
         if not name in scoreKeeper:
             scoreKeeper[name] = []
         scoreKeeper[name].extend(scores)
     return scoreKeeper