예제 #1
0
 def initPredict(self):
     datas = csv.readCSVFile(self.trainFilename, ',')
     if (datas is None):
         return False
     try:
         trainFeatures = datas[self.featureColumns]
     except Exception:
         print("One or multiple columns beetween: ",
               ", ".join(self.featureColumns), " doesn't exits")
     try:
         self.classes = datas[self.classifyColumn]
     except Exception:
         print("the classify column ", self.classifyColumn, "doesn't exits")
     predictFeature = csv.readCSVFile(self.predictFilename, ',')
     if (predictFeature is None):
         return False
     try:
         predictFeature = predictFeature[self.featureColumns]
     except Exception:
         print("One or multiple columns beetween: ",
               ", ".join(self.featureColumns), " doesn't exits")
     self.predictFeatures = self.normalizePredictFile(
         trainFeatures, predictFeature)
     self.predictFeatures.insert(
         0, 'theta0', [1.0 for _ in range(self.predictFeatures.shape[0])])
     if (self.predictFeatures is not None):
         self.initPredictDone = True
     return self.initPredictDone
예제 #2
0
    def predictAll(self):
        predicts = []
        if (self.initPredictDone is False):
            print(
                "The function init must return True before call the function train"
            )
            return None
        if (self.predictFilename is None):
            print("Add the predict filename in constructor")
            return None
        datas = csv.readCSVFile("thetas.csv", ',')
        if (datas is None):
            return False
        try:
            thetas = datas[['theta0'] + self.featureColumns]
        except Exception:
            print("Error in thetas File")
            return None
        classNames = self.classes.unique()
        for values in self.predictFeatures.values:
            results = [
                self.predict(lineThetas, values.tolist())
                for lineThetas in thetas.values
            ]
            predicts.append(classNames[results.index(max(results))])

        dataframe = {}
        dataframe.update({self.classifyColumn: predicts})
        return pd.DataFrame(dataframe)
예제 #3
0
def main():
    vlen = len(sys.argv)
    if (vlen == 2 or (vlen == 3 and sys.argv[1] == "-all")):
        datas = csv.readCSVFile(sys.argv[vlen - 1], ',')
        if (datas is None):
            sys.exit(1)
        subjectDatas = csv.dropColumns(datas, csv.notArithmetiqueSubjects)
        if (subjectDatas is None):
            sys.exit(1)
        subjectNames, subjectsByHouse = csv.getSubjectValueByHouse(datas)
        fig = plt.figure()
        elements = []
        size = len(subjectNames)
        sqrtSize = math.sqrt(size)
        sqrtSize = int(sqrtSize) if (sqrtSize
                                     == int(sqrtSize)) else int(sqrtSize + 1)
        if (vlen == 3):
            for subjectIndex in range(size):
                elements.append(
                    fig.add_subplot(sqrtSize, sqrtSize, subjectIndex + 1))
                elements[subjectIndex].hist(subjectsByHouse[subjectIndex],
                                            label=csv.houseNames,
                                            color=csv.colors)
                elements[subjectIndex].set_title(subjectNames[subjectIndex])
                elements[subjectIndex].legend()
        else:
            elements.append(fig.add_subplot(1, 1, 1))
            elements[0].hist(subjectsByHouse[10],
                             label=csv.houseNames,
                             color=csv.colors)
            elements[0].set_title(subjectNames[10])
            elements[0].legend()
        plt.show()
    else:
        print('Error script : python histogram.py [-all] file.')
예제 #4
0
def main():
    if len(sys.argv) == 2:
        datas = csv.readCSVFile(sys.argv[1], ',')
        if (datas is None):
            sys.exit(1)
        subjectDatas = csv.dropColumns(datas, csv.notArithmetiqueSubjects)
        if (subjectDatas is None):
            sys.exit(1)
        sns.relplot(x="Astronomy",
                    y="Defense Against the Dark Arts",
                    hue="Hogwarts House",
                    hue_order=csv.houseNames,
                    palette=csv.colors,
                    data=subjectDatas)
        plt.show()
    else:
        print('Error script : python scatter_.py file.')
예제 #5
0
def main():
	vlen = len(sys.argv)
	if (vlen == 2 or (vlen == 3 and sys.argv[1] == "-all")):
		datas = csv.readCSVFile(sys.argv[vlen - 1], ',')
		if (datas is None):
			sys.exit(1)
		subjectDatas = csv.dropColumns(datas, csv.notArithmetiqueSubjects)
		if (subjectDatas is None):
			sys.exit(1)
		# print(subjectDatas.describe())
		print("Describe general:\n{}\n".format(describe(subjectDatas)))
		if (vlen == 3):
			for house in csv.houseNames:
				print("Describe {}:\n{}\n".format(house, describe(subjectDatas.loc[subjectDatas['Hogwarts House'].isin({house})])))

	else:
		print('Error script : python describe.py [-all] file.')
예제 #6
0
 def initTrain(self):
     datas = csv.readCSVFile(self.trainFilename, ',')
     if (datas is None):
         return False
     try:
         self.features = self.normalize(datas[self.featureColumns])
         self.features.insert(0, 'theta0',
                              [1.0 for _ in range(self.features.shape[0])])
     except Exception:
         print("One or multiple columns beetween: ",
               ", ".join(self.featureColumns), " doesn't exits")
     try:
         self.classes = datas[self.classifyColumn]
     except Exception:
         print("the classify column ", self.classifyColumn, "doesn't exits")
     if (all(variable is not None
             for variable in [self.features, self.classes])):
         self.initTrainDone = True
     return self.initTrainDone