Ejemplo n.º 1
0
	def roc(self,cm):

		n_classes = len(cm[1])

		#roc_auc = []
		fpr = [0,1]
		tpr = [0,1]

		for c in cm:
			
			re = []
			esp = []

			tp = 0
			sm = 0

			i = 0
			j = 0
			#sensibilidade
			for i in range(n_classes):
				tp = cm[i,i]
				for j in range(n_classes):
					sm += cm[i,j]

				s = tp/sm
				re.append(s)
				fpr.append(s)

			tn = 0
			smn = 0

			i = 0
			j = 0

			#Especificidade
			for i in range(n_classes):
				tn = cm[i,i]
				for j in range(n_classes):
					smn += cm[j,i]
				e = 1-(tn/smn)
				esp.append(e)	
				tpr.append(e)

		roc = Roc()

		fpr,tpr = np.array(fpr),np.array(tpr)
		roc.set_fpr(np.sort(fpr))
		roc.set_tpr(np.sort(tpr))

		roc.set_auc(auc(roc.get_fpr(),roc.get_tpr()))

		return roc
Ejemplo n.º 2
0
    def CLogistRegression(self):

        parameters = {
            'penalty': ['l2'],
            'C': [0.000001, 0.00001, 0.0001, 0.001, 0.1, 1.0],
            'solver': ['newton-cg', 'lbfgs', 'sag'],
            'multi_class': ['ovr']
        }
        #newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’
        #'penalty':('l1'),'C':[0.000001,0.00001,0.0001,0.001,0.1,1.0],'solver':['lbfgs', 'liblinear', 'sag', 'saga']

        grid_lr = GridSearchCV(LogisticRegression(), parameters)

        #lr = LogisticRegression(penalty='l2',multi_class='ovr')

        self.classifiers.append(grid_lr)

        pred, ac, ac_v, p, r, f1, e, cm = self.cross_apply(
            grid_lr, self.array_train, self.target_train)
        roc_ = Roc()
        roc_ = self.roc(cm)

        self.df_pred['lr'] = pred

        return ac, ac_v, p, r, f1, e, cm, roc_
Ejemplo n.º 3
0
    def CSuportVectorMachine(self):

        #parameters = {'kernel':('linear', 'rbf'), 'C':[10, 100]}

        parameters = {
            'kernel': ['rbf', 'linear'],
            'gamma': [1e-3, 1e-4],
            'C': [1, 10, 100, 1000],
            'decision_function_shape': ['ovr', 'mutinomial']
        }

        grid_svm = GridSearchCV(svm.SVC(), parameters)

        #csvm = svm.SVC(kernel='linear',gamma=0.001,C=100,decision_function_shape='ovr')

        self.classifiers.append(grid_svm)

        pred, ac, ac_v, p, r, f1, e, cm = self.cross_apply(
            grid_svm, self.array_train, self.target_train)
        roc_ = Roc()
        roc_ = self.roc(cm)

        self.df_pred['svm'] = pred

        return ac, ac_v, p, r, f1, e, cm, roc_
Ejemplo n.º 4
0
	def CGradienteDesc(self):

		parameters = {'loss':['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron','squared_loss', 'huber', 'epsilon_insensitive','squared_epsilon_insensitive'],
		'penalty':['l1','l2'],'alpha':[0.000001,0.00001,0.0001,0.001,0.1,1.0],'learning_rate':['constant','optimal','invscaling'],'eta0':[0.01,0.1,1.0]}

		grid_sgd = GridSearchCV(SGDClassifier(),parameters)


		pred,ac,ac_v,p,r,f1,e,cm = self.cross_apply(grid_sgd,self.array_train,self.target_train)
		roc_  = Roc()
		roc_ = self.roc(cm)
		
		return ac,ac_v,p,r,f1,e,cm,roc_
Ejemplo n.º 5
0
	def committee(self,pesos):
		model = VotingClassifier(estimators=[('nv', self.classifiers[0]), ('svm',self.classifiers[1]), ('dt',self.classifiers[2]) ,('rf', self.classifiers[3]), ('lr',self.classifiers[4])], weights=pesos,voting='hard')


		pred,ac,ac_v,p,r,f1,e,cm_median = self.cross_apply(model,self.array_train,self.target_train)

		roc_ = Roc()

		roc_ = self.roc(cm_median)

		self.df_pred['cm'] = pred


		return ac,ac_v,p,r,f1,e,cm_median,roc_
Ejemplo n.º 6
0
	def CRandomForest(self):
		
		parameters = {'n_estimators':[1,5,10,20,30],'criterion':('gini','entropy')}

		grid_rf = GridSearchCV(RandomForestClassifier(),parameters)

		#rf = RandomForestClassifier(n_estimators=5,criterion='gini')

		self.classifiers.append(grid_rf)

		pred,ac,ac_v,p,r,f1,e,cm = self.cross_apply(grid_rf,self.array_train,self.target_train)
		roc_  = Roc()
		roc_ = self.roc(cm)

		self.df_pred['rf'] = pred

		return ac,ac_v,p,r,f1,e,cm,roc_	
Ejemplo n.º 7
0
	def CDecisionTree(self):

		parameters = {'criterion':('gini','entropy'),'splitter':('best','random'),'max_features':('auto','log2','sqrt')}

		grid_dt = GridSearchCV(tree.DecisionTreeClassifier(),parameters)

		
		#dt = tree.DecisionTreeClassifier(criterion='gini')

		self.classifiers.append(grid_dt)

		pred,ac,ac_v,p,r,f1,e,cm = self.cross_apply(grid_dt,self.array_train,self.target_train)
		roc_  = Roc()
		roc_ = self.roc(cm)

		self.df_pred['dt'] = pred


		return ac,ac_v,p,r,f1,e,cm,roc_
Ejemplo n.º 8
0
	def CMultinomialNV(self):

		parameters = {'alpha':[0.000001,0.00001,0.0001,0.001,0.1,1.0],'fit_prior':[True,False]}
		
		grid_nb = GridSearchCV(MultinomialNB(),parameters)

		#nb = MultinomialNB(alpha=0.000001)

		self.classifiers.append(grid_nb)

		#ac,ac_v,p,r,f1,e,cm = self.validation_words(grid_nb,self.array_train,self.target_train)
		pred,ac,ac_v,p,r,f1,e,cm = self.cross_apply(grid_nb,self.array_train,self.target_train)
		roc_  = Roc()
		roc_ = self.roc(cm)
		
		self.df_pred['nv'] = pred


		return ac,ac_v,p,r,f1,e,cm,roc_
Ejemplo n.º 9
0
def mensure(sent):

    results = []
    acuracias = []
    logs = []
    nv_roc = Roc()
    svm_roc = Roc()
    dt_roc = Roc()
    rf_roc = Roc()
    gd_roc = Roc()
    rl_roc = Roc()
    cm_roc = Roc()
    fpr = []
    tpr = []
    auc = []

    custos = pd.DataFrame()

    start = time.time()
    nv_acc, nv_ac, nv_p, nv_r, nv_f1, nv_e, nv_cm, nv_roc = sent.CMultinomialNV(
    )
    end = time.time()
    custos['nv'] = [end - start]
    print('Naive')
    print('ac = %f' % nv_acc)
    print('p = %f' % nv_p)
    print('r = %f' % nv_r)
    print('f1 = %f' % nv_f1)
    print('e = %f' % nv_e)
    print("time %f" % (end - start))
    print('---------------')

    sent.plot_confuse_matrix(nv_cm, 'Matriz de Confusao - Naive Bayes',
                             'matriz-nv')

    l = 'nv', nv_acc, nv_p, nv_r, nv_f1, nv_e, str(dt.now())
    logs.append(l)
    fpr.append(nv_roc.get_fpr())
    tpr.append(nv_roc.get_tpr())
    auc.append(nv_roc.get_auc())

    start = time.time()
    sgd_acc, sgd_ac, sgd_p, sgd_r, sgd_f1, sgd_e, sgd_cm, sgd_roc = sent.gradienteDesc(
    )
    end = time.time()
    custos['sgd'] = [end - start]
    print('Gradiente')
    print('ac = %f' % sgd_acc)
    print('p = %f' % sgd_p)
    print('r = %f' % sgd_r)
    print('f1 = %f' % sgd_f1)
    print('e = %f' % sgd_e)
    print("time %f" % (end - start))
    print('---------------')

    sent.plot_confuse_matrix(sgd_cm, 'Matriz de Confusao - SGD', 'matriz-sgd')

    l = 'sgd', sgd_acc, sgd_p, sgd_r, sgd_f1, sgd_e, str(dt.now())
    logs.append(l)

    fpr.append(sgd_roc.get_fpr())
    tpr.append(sgd_roc.get_tpr())
    auc.append(sgd_roc.get_auc())

    results.append(sgd_ac)
    results.append(nv_ac)

    sent.write_csv(custos, 'Result/tempo-exe')

    sent.write_csv(logs, 'Result/metricas')

    label = ['sgd', 'naive']

    sent.plot_roc_all(fpr, tpr, auc, label)
Ejemplo n.º 10
0
from sent_classification_module import *
from class_roc import Roc

if __name__ == '__main__':

    sent = SentClassifiers('dataset-portuguese')

    nv_roc = Roc()
    svm_roc = Roc()
    dt_roc = Roc()
    rf_roc = Roc()
    gd_roc = Roc()
    rl_roc = Roc()
    cm_roc = Roc()

    fpr = []
    tpr = []
    auc = []
    acuracias = []

    nv_ac, _, nv_p, nv_r, nv_f1, nv_e, nv_cm, nv_roc = sent.CMultinomialNV()

    print("Naive")
    print('ac = %f' % nv_ac)
    print('p = %f' % nv_p)
    print('r = %f' % nv_r)
    print('f1 = %f' % nv_f1)
    print('e = %f' % nv_e)
    print('---------------')

    acuracias.append(nv_ac)