def roc(self,cm): n_classes = len(cm[1]) #roc_auc = [] fpr = [0,1] tpr = [0,1] for c in cm: re = [] esp = [] tp = 0 sm = 0 i = 0 j = 0 #sensibilidade for i in range(n_classes): tp = cm[i,i] for j in range(n_classes): sm += cm[i,j] s = tp/sm re.append(s) fpr.append(s) tn = 0 smn = 0 i = 0 j = 0 #Especificidade for i in range(n_classes): tn = cm[i,i] for j in range(n_classes): smn += cm[j,i] e = 1-(tn/smn) esp.append(e) tpr.append(e) roc = Roc() fpr,tpr = np.array(fpr),np.array(tpr) roc.set_fpr(np.sort(fpr)) roc.set_tpr(np.sort(tpr)) roc.set_auc(auc(roc.get_fpr(),roc.get_tpr())) return roc
def CLogistRegression(self): parameters = { 'penalty': ['l2'], 'C': [0.000001, 0.00001, 0.0001, 0.001, 0.1, 1.0], 'solver': ['newton-cg', 'lbfgs', 'sag'], 'multi_class': ['ovr'] } #newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ #'penalty':('l1'),'C':[0.000001,0.00001,0.0001,0.001,0.1,1.0],'solver':['lbfgs', 'liblinear', 'sag', 'saga'] grid_lr = GridSearchCV(LogisticRegression(), parameters) #lr = LogisticRegression(penalty='l2',multi_class='ovr') self.classifiers.append(grid_lr) pred, ac, ac_v, p, r, f1, e, cm = self.cross_apply( grid_lr, self.array_train, self.target_train) roc_ = Roc() roc_ = self.roc(cm) self.df_pred['lr'] = pred return ac, ac_v, p, r, f1, e, cm, roc_
def CSuportVectorMachine(self): #parameters = {'kernel':('linear', 'rbf'), 'C':[10, 100]} parameters = { 'kernel': ['rbf', 'linear'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000], 'decision_function_shape': ['ovr', 'mutinomial'] } grid_svm = GridSearchCV(svm.SVC(), parameters) #csvm = svm.SVC(kernel='linear',gamma=0.001,C=100,decision_function_shape='ovr') self.classifiers.append(grid_svm) pred, ac, ac_v, p, r, f1, e, cm = self.cross_apply( grid_svm, self.array_train, self.target_train) roc_ = Roc() roc_ = self.roc(cm) self.df_pred['svm'] = pred return ac, ac_v, p, r, f1, e, cm, roc_
def CGradienteDesc(self): parameters = {'loss':['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron','squared_loss', 'huber', 'epsilon_insensitive','squared_epsilon_insensitive'], 'penalty':['l1','l2'],'alpha':[0.000001,0.00001,0.0001,0.001,0.1,1.0],'learning_rate':['constant','optimal','invscaling'],'eta0':[0.01,0.1,1.0]} grid_sgd = GridSearchCV(SGDClassifier(),parameters) pred,ac,ac_v,p,r,f1,e,cm = self.cross_apply(grid_sgd,self.array_train,self.target_train) roc_ = Roc() roc_ = self.roc(cm) return ac,ac_v,p,r,f1,e,cm,roc_
def committee(self,pesos): model = VotingClassifier(estimators=[('nv', self.classifiers[0]), ('svm',self.classifiers[1]), ('dt',self.classifiers[2]) ,('rf', self.classifiers[3]), ('lr',self.classifiers[4])], weights=pesos,voting='hard') pred,ac,ac_v,p,r,f1,e,cm_median = self.cross_apply(model,self.array_train,self.target_train) roc_ = Roc() roc_ = self.roc(cm_median) self.df_pred['cm'] = pred return ac,ac_v,p,r,f1,e,cm_median,roc_
def CRandomForest(self): parameters = {'n_estimators':[1,5,10,20,30],'criterion':('gini','entropy')} grid_rf = GridSearchCV(RandomForestClassifier(),parameters) #rf = RandomForestClassifier(n_estimators=5,criterion='gini') self.classifiers.append(grid_rf) pred,ac,ac_v,p,r,f1,e,cm = self.cross_apply(grid_rf,self.array_train,self.target_train) roc_ = Roc() roc_ = self.roc(cm) self.df_pred['rf'] = pred return ac,ac_v,p,r,f1,e,cm,roc_
def CDecisionTree(self): parameters = {'criterion':('gini','entropy'),'splitter':('best','random'),'max_features':('auto','log2','sqrt')} grid_dt = GridSearchCV(tree.DecisionTreeClassifier(),parameters) #dt = tree.DecisionTreeClassifier(criterion='gini') self.classifiers.append(grid_dt) pred,ac,ac_v,p,r,f1,e,cm = self.cross_apply(grid_dt,self.array_train,self.target_train) roc_ = Roc() roc_ = self.roc(cm) self.df_pred['dt'] = pred return ac,ac_v,p,r,f1,e,cm,roc_
def CMultinomialNV(self): parameters = {'alpha':[0.000001,0.00001,0.0001,0.001,0.1,1.0],'fit_prior':[True,False]} grid_nb = GridSearchCV(MultinomialNB(),parameters) #nb = MultinomialNB(alpha=0.000001) self.classifiers.append(grid_nb) #ac,ac_v,p,r,f1,e,cm = self.validation_words(grid_nb,self.array_train,self.target_train) pred,ac,ac_v,p,r,f1,e,cm = self.cross_apply(grid_nb,self.array_train,self.target_train) roc_ = Roc() roc_ = self.roc(cm) self.df_pred['nv'] = pred return ac,ac_v,p,r,f1,e,cm,roc_
def mensure(sent): results = [] acuracias = [] logs = [] nv_roc = Roc() svm_roc = Roc() dt_roc = Roc() rf_roc = Roc() gd_roc = Roc() rl_roc = Roc() cm_roc = Roc() fpr = [] tpr = [] auc = [] custos = pd.DataFrame() start = time.time() nv_acc, nv_ac, nv_p, nv_r, nv_f1, nv_e, nv_cm, nv_roc = sent.CMultinomialNV( ) end = time.time() custos['nv'] = [end - start] print('Naive') print('ac = %f' % nv_acc) print('p = %f' % nv_p) print('r = %f' % nv_r) print('f1 = %f' % nv_f1) print('e = %f' % nv_e) print("time %f" % (end - start)) print('---------------') sent.plot_confuse_matrix(nv_cm, 'Matriz de Confusao - Naive Bayes', 'matriz-nv') l = 'nv', nv_acc, nv_p, nv_r, nv_f1, nv_e, str(dt.now()) logs.append(l) fpr.append(nv_roc.get_fpr()) tpr.append(nv_roc.get_tpr()) auc.append(nv_roc.get_auc()) start = time.time() sgd_acc, sgd_ac, sgd_p, sgd_r, sgd_f1, sgd_e, sgd_cm, sgd_roc = sent.gradienteDesc( ) end = time.time() custos['sgd'] = [end - start] print('Gradiente') print('ac = %f' % sgd_acc) print('p = %f' % sgd_p) print('r = %f' % sgd_r) print('f1 = %f' % sgd_f1) print('e = %f' % sgd_e) print("time %f" % (end - start)) print('---------------') sent.plot_confuse_matrix(sgd_cm, 'Matriz de Confusao - SGD', 'matriz-sgd') l = 'sgd', sgd_acc, sgd_p, sgd_r, sgd_f1, sgd_e, str(dt.now()) logs.append(l) fpr.append(sgd_roc.get_fpr()) tpr.append(sgd_roc.get_tpr()) auc.append(sgd_roc.get_auc()) results.append(sgd_ac) results.append(nv_ac) sent.write_csv(custos, 'Result/tempo-exe') sent.write_csv(logs, 'Result/metricas') label = ['sgd', 'naive'] sent.plot_roc_all(fpr, tpr, auc, label)
from sent_classification_module import * from class_roc import Roc if __name__ == '__main__': sent = SentClassifiers('dataset-portuguese') nv_roc = Roc() svm_roc = Roc() dt_roc = Roc() rf_roc = Roc() gd_roc = Roc() rl_roc = Roc() cm_roc = Roc() fpr = [] tpr = [] auc = [] acuracias = [] nv_ac, _, nv_p, nv_r, nv_f1, nv_e, nv_cm, nv_roc = sent.CMultinomialNV() print("Naive") print('ac = %f' % nv_ac) print('p = %f' % nv_p) print('r = %f' % nv_r) print('f1 = %f' % nv_f1) print('e = %f' % nv_e) print('---------------') acuracias.append(nv_ac)