def applyRandomForestClassifier(self, data, target): response = "" try: #instancia a random forest y aplicacion del mismo random_forest = RandomForestClassifier(max_depth=2, random_state=0, n_estimators=10, n_jobs=-1, criterion='gini') random_forest = random_forest.fit(data, target) #obtenemos las importancias importances = pd.DataFrame({'feature':data.columns.tolist(),'importance':np.round(random_forest.feature_importances_,3)}) importances = importances.sort_values('importance',ascending=False).set_index('feature') #exportamos el resultado nameCSV = "%srankingImportance.csv" % (self.pathResponse) importances.to_csv(nameCSV) #generamos el grafico de las relevancias dataP = pd.read_csv(nameCSV) keys = dataP['feature'] values = dataP['importance'] for i in range(len(values)): values[i] = values[i]*100 namePicture = self.pathResponse+"RelevanceRanking_SpatialCLF.png" #instanciamos un objeto del tipo graph graph = createCharts.graphicsCreator() graph.createBarChart(keys, values, 'Component', 'Relevance (%)', 'Ranking Relevance Components', namePicture) response = "OK" except: response = "ERROR" pass return response
def createHistogram(self): graphic = createCharts.graphicsCreator() for key in self.performance: print "Create histogram for ", key namePicture = self.pathResponse+key+".svg" title = "Histogram for "+key graphic.generateHistogram(self.dataFrame, key, namePicture, title)
def doPCA(self): okidokie = "" try: X_or = self.normalizeDataSet() #PCA X = stats.zscore(X_or, axis=0) high, width = X.shape V = np.cov(X.T) values, vectors = np.linalg.eig(V) eig_pairs = [(abs(values[i]), vectors[:, i]) for i in range(len(values))] eig_pairs.sort() #ordena menor a mayor eig_pairs.reverse() #idem suma = sum(values) #suma los valores eigen pct = [(i * 100) / suma for i in sorted(values, reverse=True) ] #saca los pct de peso de cada caracteristica aux = 0 W = np.empty((width, 0)) # se crea 1 matriz vacia P = np.empty((0, 2)) for i in (pct): W = np.hstack((W, eig_pairs[aux][1].reshape(width, 1))) P = np.vstack((P, [aux + 1, i])) aux += 1 Y = X.dot(W) ############################# #Archivos y cosas file = "%sTransformedPCA.csv" % (self.pathResponse) filePCT = "%sPCTPCA.csv" % (self.pathResponse) df = pd.DataFrame(Y) df.to_csv(file, index=False) dfPct = pd.DataFrame(P, columns=["Component", "Relevance"]) dfPct.to_csv(filePCT, index=False) #generamos el grafico de las relevancias keys = dfPct['Component'] values = dfPct['Relevance'] namePicture = self.pathResponse + "RelevanceRanking_PCA.png" #instanciamos un objeto del tipo graph graph = createCharts.graphicsCreator() graph.createBarChart(keys, values, 'Component', 'Relevance (%)', 'Ranking Relevance Components', namePicture) okidokie = "OK" except Exception as e: #raise e okidokie = "ERROR" pass return okidokie
def createHistogram(self): keys = ['calinski_harabaz_score', 'silhouette_score', 'groups'] graphic = createCharts.graphicsCreator() for key in keys: print "Create histogram for ", key namePicture = self.pathResponse + key + ".svg" title = "Histogram for " + key graphic.generateHistogram(self.dataFrame, key, namePicture, title)
def createConfusionMatrix(self, dictTransform): self.predictions = cross_val_predict(self.modelData, self.dataSet, self.target, cv=self.cv_values) matrix = confusion_matrix(self.target, self.predictions) dictResponse = self.exportConfusionMatrix(matrix, dictTransform) graph = createCharts.graphicsCreator() graph.createConfusionMatrixPictures(matrix, self.target, self.path + "confusionMatrix.svg") return dictResponse
def incrementalPCA(self): okidokie = "" try: X_or = self.normalizeDataSet() high, width = X_or.shape trans = IncrementalPCA(n_components=width) Y = trans.fit_transform(X_or) #CSV file = "%sIncrementalPCA.csv" % (self.pathResponse) df = pd.DataFrame(Y) df.to_csv(file, index=False) #varianza explicada explaindVariance = trans.explained_variance_ratio_ matrix = [] index = 1 for element in explaindVariance: component = "PCA " + str(index) row = [component, element * 100] matrix.append(row) index += 1 fileExport = "%svarianzaExplained.csv" % (self.pathResponse) dfVar = pd.DataFrame(matrix, columns=["Component", "Variance"]) dfVar.to_csv(fileExport, index=False) #generamos el grafico de las relevancias keys = dfVar['Component'] values = dfVar['Variance'] namePicture = self.pathResponse + "RelevanceRanking_IPCA.png" #instanciamos un objeto del tipo graph graph = createCharts.graphicsCreator() graph.createBarChart(keys, values, 'Component', 'Relevance (%)', 'Ranking Relevance Components', namePicture) okidokie = "OK" except Exception as e: #raise e okidokie = "ERROR" pass return okidokie
def exportConfusionMatrix(self, matrix, dictTransform): #calculamos la sensitividad del modelo (en base a los valores de la primera columna) bakanosidad = [] for i in range(len(matrix)): sumRow = sum(matrix[i]) value = (matrix[i][i] / float(sumRow)) * 100 bakanosidad.append(value) #calculamos la especificidad del modelo... transpose = matrix.transpose() fiabilidad = [] for i in range(len(transpose)): sumRow = sum(transpose[i]) value = (transpose[i][i] / float(sumRow)) * 100 fiabilidad.append(value) header = [] for element in self.classList: header.append(self.getKeyToDict(dictTransform, element)) matrixData = [] for element in matrix: #obtenemos las columnas rowSum = sum(element) row = [] for value in element: dataInValue = (value / float(rowSum)) * 100 row.append(dataInValue) matrixData.append(row) dictResponse = { "Specificity": fiabilidad, "Sensitivity": bakanosidad, "matrix": matrixData, "header": header } #generamos el grafico de barras comparativas entre estas medidas graph = createCharts.graphicsCreator() graph.createBarChartCompare(fiabilidad, bakanosidad, 'Specificity', 'Sensitivity', 'Class Response', 'Percentage', "Quality of the model", self.target, self.path + "barchartCompare.png") return dictResponse
def checkExec(self): if self.optionProcess == 1: #show data continue try: graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "viewContinueValuesFor_" + self.keyFeature + ".png" graphic.createScatterContinueData( self.dataSet[self.keyFeature], namePicture, self.keyFeature) print "Create graphic OK" except: print "Error during create graphic" pass elif self.optionProcess == 2: #boxplot and violinplot try: graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "boxplot.svg" graphic.createBoxPlot(self.dataSet, namePicture) print "Box plot graphic OK" except: print "Error during create BoxPlot" pass try: graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "violinplot.svg" graphic.createViolinPlot(self.dataSet, namePicture) print "Violin plot graphic OK" except: print "Error during create Violin" pass elif self.optionProcess == 3: #histograma try: graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "histogram_" + self.keyFeature + ".svg" title = "Histogram for feature " + self.keyFeature graphic.generateHistogram(self.dataSet, self.keyFeature, namePicture, title) print "create histogram for feature: ", self.keyFeature except: print "Error during create Histogram" pass elif self.optionProcess == 4: #frequence try: keys = list(set(self.dataSet[self.keyFeature])) values = [] for key in keys: cont = 0 for i in range(len(self.dataSet[self.keyFeature])): if self.dataSet[self.keyFeature][i] == key: cont += 1 values.append(cont) namePicture = self.pathResponse + "piechartFor_" + self.keyFeature + ".svg" graphic = createCharts.graphicsCreator() graphic.createPieChart(keys, values, namePicture) print "Create pie chart for " + self.keyFeature except: print "Error during create a pie chart" pass elif self.optionProcess == 5: #parallel try: graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "parallel_coordinates_" + self.keyFeature + ".svg" title = "parallel_coordinates for " + self.keyFeature graphic.createParallelCoordinates(self.dataSet, self.keyFeature, namePicture, title) print "Create parallel_coordinates graphic" except: print "Error during create a parallel_coordinates" pass elif self.optionProcess == 6: #SPLOM try: graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "splom.svg" graphic.createScatterPlotMatrix(self.dataSet, namePicture, self.keyFeature) print "Create SPLOM for feature ", self.keyFeature except: print "Error during create SPLOM" pass else: matrixResponse = [] header = ["Feature", "Mean", "STD", "Variance", "Min", "Max"] #trabajamos con las estadisticas... for key in self.dataSet: try: print "Process ", key row = [] row.append(key) row.append(np.mean(self.dataSet[key])) row.append(np.std(self.dataSet[key])) row.append(np.var(self.dataSet[key])) row.append(min(self.dataSet[key])) row.append(max(self.dataSet[key])) matrixResponse.append(row) except: pass df = pd.DataFrame(matrixResponse, columns=header) df.to_csv(self.pathResponse + "summaryStatistical.csv", index=False) print "Create summaryStatistical.csv file"
def execAlgorithmByOptions(self): if self.algorithm == 1: #AdaBoost errorData = {} self.responseExec.update({"algorithm": "AdaBoostRegressor"}) paramsData = {} paramsData.update({"n_estimators": self.params[0]}) paramsData.update({"loss": self.params[1]}) self.responseExec.update({"Params": paramsData}) try: #instancia al objeto... AdaBoostObject = AdaBoost.AdaBoost(self.data, self.response, int(self.params[0]), self.params[1]) AdaBoostObject.trainingMethod() performance = {} performance.update({"r_score": AdaBoostObject.r_score}) performance.update( {"predict_values": AdaBoostObject.predicctions.tolist()}) performance.update( {"real_values": AdaBoostObject.response.tolist()}) #calculamos las medidas asociadas a la data de interes... performanceValues = performanceData.performancePrediction( self.response, AdaBoostObject.predicctions.tolist()) pearsonValue = performanceValues.calculatedPearson() spearmanValue = performanceValues.calculatedSpearman() kendalltauValue = performanceValues.calculatekendalltau() #los agregamos al diccionario performance.update({"pearson": pearsonValue}) performance.update({"spearman": spearmanValue}) performance.update({"kendalltau": kendalltauValue}) self.responseExec.update({"Performance": performance}) errorData.update({"Process": "OK"}) #instancia a graphic para crear scatter plot graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "scatter.png" graphic.createScatterPlotErrorPrediction( AdaBoostObject.response.tolist(), AdaBoostObject.predicctions.tolist(), namePicture) except: errorData.update({"Process": "ERROR"}) pass self.responseExec.update({"errorExec": errorData}) #exportamos tambien el resultado del json nameFile = self.pathResponse + "responseTraining.json" with open(self.pathResponse + "responseTraining.json", 'w') as fp: json.dump(self.responseExec, fp) elif self.algorithm == 2: #Bagging errorData = {} self.responseExec.update({"algorithm": "BaggingRegressor"}) paramsData = {} paramsData.update({"n_estimators": self.params[0]}) paramsData.update({"bootstrap": self.params[1]}) self.responseExec.update({"Params": paramsData}) try: #instancia al objeto... baggingObject = Baggin.Baggin(self.data, self.response, int(self.params[0]), self.params[1]) baggingObject.trainingMethod() performance = {} performance.update({"r_score": baggingObject.r_score}) performance.update( {"predict_values": baggingObject.predicctions.tolist()}) performance.update( {"real_values": baggingObject.response.tolist()}) #calculamos las medidas asociadas a la data de interes... performanceValues = performanceData.performancePrediction( self.response, baggingObject.predicctions.tolist()) pearsonValue = performanceValues.calculatedPearson() spearmanValue = performanceValues.calculatedSpearman() kendalltauValue = performanceValues.calculatekendalltau() #los agregamos al diccionario performance.update({"pearson": pearsonValue}) performance.update({"spearman": spearmanValue}) performance.update({"kendalltau": kendalltauValue}) self.responseExec.update({"Performance": performance}) errorData.update({"Process": "OK"}) #instancia a graphic para crear scatter plot graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "scatter.png" graphic.createScatterPlotErrorPrediction( baggingObject.response.tolist(), baggingObject.predicctions.tolist(), namePicture) except: errorData.update({"Process": "ERROR"}) pass self.responseExec.update({"errorExec": errorData}) #exportamos tambien el resultado del json nameFile = self.pathResponse + "responseTraining.json" with open(self.pathResponse + "responseTraining.json", 'w') as fp: json.dump(self.responseExec, fp) elif self.algorithm == 3: #DecisionTree errorData = {} self.responseExec.update({"algorithm": "DecisionTreeRegressor"}) paramsData = {} paramsData.update({"criterion": self.params[0]}) paramsData.update({"splitter": self.params[1]}) self.responseExec.update({"Params": paramsData}) try: #instancia al objeto... decisionObject = DecisionTree.DecisionTree( self.data, self.response, self.params[0], self.params[1]) decisionObject.trainingMethod() performance = {} performance.update({"r_score": decisionObject.r_score}) performance.update( {"predict_values": decisionObject.predicctions.tolist()}) performance.update( {"real_values": decisionObject.response.tolist()}) #calculamos las medidas asociadas a la data de interes... performanceValues = performanceData.performancePrediction( self.response, decisionObject.predicctions.tolist()) pearsonValue = performanceValues.calculatedPearson() spearmanValue = performanceValues.calculatedSpearman() kendalltauValue = performanceValues.calculatekendalltau() #los agregamos al diccionario performance.update({"pearson": pearsonValue}) performance.update({"spearman": spearmanValue}) performance.update({"kendalltau": kendalltauValue}) self.responseExec.update({"Performance": performance}) errorData.update({"Process": "OK"}) #instancia a graphic para crear scatter plot graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "scatter.png" graphic.createScatterPlotErrorPrediction( decisionObject.response.tolist(), decisionObject.predicctions.tolist(), namePicture) except: errorData.update({"Process": "ERROR"}) pass self.responseExec.update({"errorExec": errorData}) #exportamos tambien el resultado del json nameFile = self.pathResponse + "responseTraining.json" with open(self.pathResponse + "responseTraining.json", 'w') as fp: json.dump(self.responseExec, fp) elif self.algorithm == 4: #Gradient errorData = {} self.responseExec.update( {"algorithm": "GradientBoostingRegressor"}) paramsData = {} paramsData.update({"n_estimators": self.params[0]}) paramsData.update({"loss": self.params[1]}) paramsData.update({"criterion": self.params[2]}) paramsData.update({"min_samples_leaf": self.params[3]}) paramsData.update({"min_samples_split": self.params[4]}) self.responseExec.update({"Params": paramsData}) try: #instancia al objeto... gradientObject = Gradient.Gradient(self.data, self.response, int(self.params[0]), self.params[1], self.params[2], int(self.params[4]), int(self.params[3])) gradientObject.trainingMethod() performance = {} performance.update({"r_score": gradientObject.r_score}) performance.update( {"predict_values": gradientObject.predicctions.tolist()}) performance.update( {"real_values": gradientObject.response.tolist()}) #calculamos las medidas asociadas a la data de interes... performanceValues = performanceData.performancePrediction( self.response, gradientObject.predicctions.tolist()) pearsonValue = performanceValues.calculatedPearson() spearmanValue = performanceValues.calculatedSpearman() kendalltauValue = performanceValues.calculatekendalltau() #los agregamos al diccionario performance.update({"pearson": pearsonValue}) performance.update({"spearman": spearmanValue}) performance.update({"kendalltau": kendalltauValue}) self.responseExec.update({"Performance": performance}) errorData.update({"Process": "OK"}) #instancia a graphic para crear scatter plot graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "scatter.png" graphic.createScatterPlotErrorPrediction( gradientObject.response.tolist(), gradientObject.predicctions.tolist(), namePicture) except: errorData.update({"Process": "ERROR"}) pass self.responseExec.update({"errorExec": errorData}) #exportamos tambien el resultado del json nameFile = self.pathResponse + "responseTraining.json" with open(self.pathResponse + "responseTraining.json", 'w') as fp: json.dump(self.responseExec, fp) elif self.algorithm == 5: #KNN errorData = {} self.responseExec.update({"algorithm": "KNeighborsRegressor"}) paramsData = {} paramsData.update({"n_neighbors": self.params[0]}) paramsData.update({"algorithm": self.params[1]}) paramsData.update({"metric": self.params[2]}) paramsData.update({"weights": self.params[3]}) self.responseExec.update({"Params": paramsData}) try: #instancia al objeto... knnObject = knn_regression.KNN_Model(self.data, self.response, int(self.params[0]), self.params[1], self.params[2], self.params[3]) knnObject.trainingMethod() performance = {} performance.update({"r_score": knnObject.r_score}) performance.update( {"predict_values": knnObject.predicctions.tolist()}) performance.update( {"real_values": knnObject.response.tolist()}) #calculamos las medidas asociadas a la data de interes... performanceValues = performanceData.performancePrediction( self.response, knnObject.predicctions.tolist()) pearsonValue = performanceValues.calculatedPearson() spearmanValue = performanceValues.calculatedSpearman() kendalltauValue = performanceValues.calculatekendalltau() #los agregamos al diccionario performance.update({"pearson": pearsonValue}) performance.update({"spearman": spearmanValue}) performance.update({"kendalltau": kendalltauValue}) self.responseExec.update({"Performance": performance}) errorData.update({"Process": "OK"}) #instancia a graphic para crear scatter plot graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "scatter.png" graphic.createScatterPlotErrorPrediction( knnObject.response.tolist(), knnObject.predicctions.tolist(), namePicture) except: errorData.update({"Process": "ERROR"}) pass self.responseExec.update({"errorExec": errorData}) #exportamos tambien el resultado del json nameFile = self.pathResponse + "responseTraining.json" with open(self.pathResponse + "responseTraining.json", 'w') as fp: json.dump(self.responseExec, fp) elif self.algorithm == 6: #MLP errorData = {} self.responseExec.update({"algorithm": "MLPRegressor"}) paramsData = {} paramsData.update({"activation": self.params[0]}) paramsData.update({"solver": self.params[1]}) paramsData.update({"learning_rate": self.params[2]}) paramsData.update({"hidden_layer_sizes_a": self.params[3]}) paramsData.update({"hidden_layer_sizes_b": self.params[4]}) paramsData.update({"hidden_layer_sizes_c": self.params[5]}) paramsData.update({"alpha": self.params[6]}) paramsData.update({"max_iter": self.params[7]}) paramsData.update({"shuffle": self.params[8]}) self.responseExec.update({"Params": paramsData}) try: #instancia al objeto... MLPObject = MLP.MLP(self.data, self.response, self.params[0], self.params[1], self.params[2], int(self.params[3]), int(self.params[4]), int(self.params[5]), float(self.params[6]), int(self.params[7]), self.params[8]) MLPObject.trainingMethod() performance = {} performance.update({"r_score": MLPObject.r_score}) performance.update( {"predict_values": MLPObject.predicctions.tolist()}) performance.update( {"real_values": MLPObject.response.tolist()}) #calculamos las medidas asociadas a la data de interes... performanceValues = performanceData.performancePrediction( self.response, MLPObject.predicctions.tolist()) pearsonValue = performanceValues.calculatedPearson() spearmanValue = performanceValues.calculatedSpearman() kendalltauValue = performanceValues.calculatekendalltau() #los agregamos al diccionario performance.update({"pearson": pearsonValue}) performance.update({"spearman": spearmanValue}) performance.update({"kendalltau": kendalltauValue}) self.responseExec.update({"Performance": performance}) errorData.update({"Process": "OK"}) #instancia a graphic para crear scatter plot graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "scatter.png" graphic.createScatterPlotErrorPrediction( MLPObject.response.tolist(), MLPObject.predicctions.tolist(), namePicture) except: errorData.update({"Process": "ERROR"}) pass self.responseExec.update({"errorExec": errorData}) #exportamos tambien el resultado del json nameFile = self.pathResponse + "responseTraining.json" with open(self.pathResponse + "responseTraining.json", 'w') as fp: json.dump(self.responseExec, fp) elif self.algorithm == 7: #NuSVR errorData = {} self.responseExec.update({"algorithm": "NuSVR"}) paramsData = {} paramsData.update({"kernel": self.params[0]}) paramsData.update({"nu": self.params[1]}) paramsData.update({"degree": self.params[2]}) paramsData.update({"gamma": self.params[3]}) self.responseExec.update({"Params": paramsData}) try: #instancia al objeto... nuSVM = NuSVR.NuSVRModel(self.data, self.response, self.params[0], int(self.params[2]), float(self.params[3]), float(self.params[1])) nuSVM.trainingMethod() performance = {} performance.update({"r_score": nuSVM.r_score}) performance.update( {"predict_values": nuSVM.predicctions.tolist()}) performance.update({"real_values": nuSVM.response.tolist()}) #calculamos las medidas asociadas a la data de interes... performanceValues = performanceData.performancePrediction( self.response, nuSVM.predicctions.tolist()) pearsonValue = performanceValues.calculatedPearson() spearmanValue = performanceValues.calculatedSpearman() kendalltauValue = performanceValues.calculatekendalltau() #los agregamos al diccionario performance.update({"pearson": pearsonValue}) performance.update({"spearman": spearmanValue}) performance.update({"kendalltau": kendalltauValue}) self.responseExec.update({"Performance": performance}) errorData.update({"Process": "OK"}) #instancia a graphic para crear scatter plot graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "scatter.png" graphic.createScatterPlotErrorPrediction( nuSVM.response.tolist(), nuSVM.predicctions.tolist(), namePicture) except: errorData.update({"Process": "ERROR"}) pass self.responseExec.update({"errorExec": errorData}) #exportamos tambien el resultado del json nameFile = self.pathResponse + "responseTraining.json" with open(self.pathResponse + "responseTraining.json", 'w') as fp: json.dump(self.responseExec, fp) elif self.algorithm == 8: #RandomForest errorData = {} self.responseExec.update({"algorithm": "RandomForestRegressor"}) paramsData = {} paramsData.update({"n_estimators": self.params[0]}) paramsData.update({"criterion": self.params[1]}) paramsData.update({"min_samples_split": self.params[2]}) paramsData.update({"min_samples_leaf": self.params[3]}) paramsData.update({"bootstrap": self.params[4]}) self.responseExec.update({"Params": paramsData}) try: #instancia al objeto... rf = RandomForest.RandomForest(self.data, self.response, int(self.params[0]), self.params[1], int(self.params[2]), int(self.params[3]), self.params[4]) rf.trainingMethod() performance = {} performance.update({"r_score": rf.r_score}) performance.update( {"predict_values": rf.predicctions.tolist()}) performance.update({"real_values": rf.response.tolist()}) #calculamos las medidas asociadas a la data de interes... performanceValues = performanceData.performancePrediction( self.response, rf.predicctions.tolist()) pearsonValue = performanceValues.calculatedPearson() spearmanValue = performanceValues.calculatedSpearman() kendalltauValue = performanceValues.calculatekendalltau() #los agregamos al diccionario performance.update({"pearson": pearsonValue}) performance.update({"spearman": spearmanValue}) performance.update({"kendalltau": kendalltauValue}) self.responseExec.update({"Performance": performance}) errorData.update({"Process": "OK"}) #instancia a graphic para crear scatter plot graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "scatter.png" graphic.createScatterPlotErrorPrediction( rf.response.tolist(), rf.predicctions.tolist(), namePicture) except: errorData.update({"Process": "ERROR"}) pass self.responseExec.update({"errorExec": errorData}) #exportamos tambien el resultado del json nameFile = self.pathResponse + "responseTraining.json" with open(self.pathResponse + "responseTraining.json", 'w') as fp: json.dump(self.responseExec, fp) else: #SVR errorData = {} self.responseExec.update({"algorithm": "SVR"}) paramsData = {} paramsData.update({"kernel": self.params[0]}) paramsData.update({"degree": self.params[1]}) paramsData.update({"gamma": self.params[2]}) self.responseExec.update({"Params": paramsData}) try: #instancia al objeto... svm = SVR.SVRModel(self.data, self.response, self.params[0], int(self.params[1]), float(self.params[2])) svm.trainingMethod() performance = {} performance.update({"r_score": svm.r_score}) performance.update( {"predict_values": svm.predicctions.tolist()}) performance.update({"real_values": svm.response.tolist()}) #calculamos las medidas asociadas a la data de interes... performanceValues = performanceData.performancePrediction( self.response, svm.predicctions.tolist()) pearsonValue = performanceValues.calculatedPearson() spearmanValue = performanceValues.calculatedSpearman() kendalltauValue = performanceValues.calculatekendalltau() #los agregamos al diccionario performance.update({"pearson": pearsonValue}) performance.update({"spearman": spearmanValue}) performance.update({"kendalltau": kendalltauValue}) self.responseExec.update({"Performance": performance}) errorData.update({"Process": "OK"}) #instancia a graphic para crear scatter plot graphic = createCharts.graphicsCreator() namePicture = self.pathResponse + "scatter.png" graphic.createScatterPlotErrorPrediction( svm.response.tolist(), svm.predicctions.tolist(), namePicture) except: errorData.update({"Process": "ERROR"}) pass self.responseExec.update({"errorExec": errorData}) #exportamos tambien el resultado del json nameFile = self.pathResponse + "responseTraining.json" with open(self.pathResponse + "responseTraining.json", 'w') as fp: json.dump(self.responseExec, fp)
def execAlgorithmByOptions(self): nameDoc = "" if self.algorithm == 1: #kmeans responseExec = self.applyClustering.aplicateKMeans( int(self.params[0])) self.response.update({"algorithm": "K Means"}) paramsData = {} paramsData.update({"Number K": self.params[0]}) self.response.update({"Params": paramsData}) if responseExec == 0: self.response.update({"responseExec": "OK"}) else: self.response.update({"responseExec": "ERROR"}) elif self.algorithm == 2: #Birch responseExec = self.applyClustering.aplicateBirch( int(self.params[0])) self.response.update({"algorithm": "Birch"}) paramsData = {} paramsData.update({"Number K": self.params[0]}) self.response.update({"Params": paramsData}) if responseExec == 0: self.response.update({"responseExec": "OK"}) else: self.response.update({"responseExec": "ERROR"}) elif self.algorithm == 3: #Agglomerative responseExec = self.applyClustering.aplicateAlgomerativeClustering( self.params[0], self.params[1], int(self.params[2])) self.response.update({"algorithm": "Agglomerative Clustering"}) paramsData = {} paramsData.update({"Linkage": self.params[0]}) paramsData.update({"Affinity": self.params[1]}) paramsData.update({"Number K": self.params[2]}) self.response.update({"Params": paramsData}) if responseExec == 0: self.response.update({"responseExec": "OK"}) else: self.response.update({"responseExec": "ERROR"}) elif self.algorithm == 4: #DBSCAN responseExec = self.applyClustering.aplicateDBSCAN() self.response.update({"algorithm": "DBSCAN"}) paramsData = {} paramsData.update({"Default": "Default"}) self.response.update({"Params": paramsData}) if responseExec == 0: self.response.update({"responseExec": "OK"}) else: self.response.update({"responseExec": "ERROR"}) elif self.algorithm == 5: #MeanShift responseExec = self.applyClustering.aplicateMeanShift() self.response.update({"algorithm": "Mean Shift"}) paramsData = {} paramsData.update({"Default": "Default"}) self.response.update({"Params": paramsData}) if responseExec == 0: self.response.update({"responseExec": "OK"}) else: self.response.update({"responseExec": "ERROR"}) else: responseExec = self.applyClustering.aplicateAffinityPropagation() self.response.update({"algorithm": "Affinity Propagation"}) paramsData = {} paramsData.update({"Default": "Default"}) self.response.update({"Params": paramsData}) if responseExec == 0: self.response.update({"responseExec": "OK"}) else: self.response.update({"responseExec": "ERROR"}) #solo si la ejecucion fue correcta! if self.response['responseExec'] == "OK": print "Eval clustering" #evaluamos el clustering y obtenemos los resultados... result = evaluationClustering.evaluationClustering( self.dataSet, self.applyClustering.labels) #evaluamos... self.response.update({"calinski_harabaz_score": result.calinski}) self.response.update({"silhouette_score": result.siluetas}) #print self.response #finalmente, agregamos los labels al set de datos y generamos el resultado en el path entregado... self.dataSet["Labels"] = pd.Series(self.applyClustering.labels, index=self.dataSet.index) self.dataSet.to_csv(self.pathResponse + "responseClustering.csv") print "Create file responseClustering.csv" #hacemos el conteo de los elementos por grupo para la generacion del grafico de torta asociada a la cantidad de grupos... countGroup, keys, values = self.countMemberGroup() self.response.update({"membersGroup": countGroup}) #hacemos la instancia para generar el grafico namePic = self.pathResponse + "distributionGroup.png" createChartsObject = createCharts.graphicsCreator() createChartsObject.createPieChart(keys, values, namePic) print self.response #exportamos tambien el resultado del json with open(self.pathResponse + "responseClustering.json", 'w') as fp: json.dump(self.response, fp) print "Create file responseClustering.json"