Esempio n. 1
0
def clasificar_ECOC(X, y, df, trainInputs, trainOutputs, testInputs, testOutputs, graphname):
	print("\n[" + str(graphname) + "]")
	kernelRBF=1.0*RBF(1.0)
	clf=OutputCodeClassifier(estimator = DecisionTreeClassifier())
	clf=clf.fit(trainInputs, trainOutputs)
	precisionTrain = clf.score(trainInputs, trainOutputs)
	precisionTest = clf.score(testInputs, testOutputs)
	print("\tCCR train = %.2f%% | CCR test = %.2f%%" % (precisionTrain*100, precisionTest*100))
	prediccion_test = clf.predict(testInputs)
	print(prediccion_test)
	print(testOutputs)
	return precisionTest
Esempio n. 2
0
 def evaluateOutputCode(X, Y, printReport=False):
     time = datetime.datetime.now()
     X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                         Y,
                                                         test_size=0.2,
                                                         random_state=42)
     clf = OutputCodeClassifier(LinearSVC(random_state=0),
                                code_size=2,
                                random_state=0)
     clf.fit(X_train, Y_train)
     if printReport:
         print 'Training time:' + str(datetime.datetime.now() - time)
         print 'Evaluation result: OneVsOne: ' + str(
             clf.score(X_test, Y_test))
     Y_test = clf.predict(X_test)
     if printReport:
         print '0: ' + str((Y_test == 0).sum())
         print '1: ' + str((Y_test == 1).sum())
         print '2: ' + str((Y_test == 2).sum())
     return [clf.score(X_test, Y_test), (Y_test == 1).sum(), clf]
Esempio n. 3
0
def OutputCodeClassifier(data, label, pred_data, pred_last):
    '''
    0.76473194506
    Number of mislabeled points out of a total 841 points : 211
    0.749108204518
    需要规范化
    '''
    data = np.array(data)
    pred_data = np.array(pred_data)
    label = np.array(label)
    pred_last = np.array(pred_last)
    from sklearn.multiclass import OutputCodeClassifier
    from sklearn.svm import LinearSVC
    clf = OutputCodeClassifier(LinearSVC(random_state=0),
                               code_size=2,
                               random_state=0)
    clf.fit(data, label)

    print clf.score(data, label)
    pred_result = clf.predict(pred_data)
    print("Number of mislabeled points out of a total %d points : %d" %
          (pred_data.shape[0], (pred_last != pred_result).sum()))
    print clf.score(pred_data, pred_last)
    return pred_result
def ECOC():

    print('Aplicando metodo multiclase ERROR CORRECTING OUTPUT CODES')
    for indice in lista_datasets:

        print('Base de datos: ' + str(indice))
        dataset = arff.loadarff('./datasets/' + str(indice))
        df = pd.DataFrame(dataset[0])
        input = df.iloc[:, df.columns != 'class']
        output = pd.factorize(df['class'])[0]
        X_train, X_test, Y_train, Y_test = train_test_split(input, output, test_size=0.25)

        clf = OutputCodeClassifier(KNeighborsClassifier(n_neighbors=5), code_size=2, random_state=0)
        clf.fit(X_train, Y_train)

        print('Porcentaje de bien clasificados ERROR CORRECTING OUTPUT CODES')
        print(clf.score(X_test, Y_test))
    print('--------------------------')
Esempio n. 5
0
# Test
threshold_test = np.where((y_test == 0) | (y_test == 1) | (y_test == 7)
                          | (y_test == 8))
y_test_thres, x_test_thres = y_test[threshold_test], x_test[threshold_test]

###################################################################################################
################################# Training a classifier (4  numbers) ##############################

num_iter = 5

start_time_OCC = time.time()

OCC = OutputCodeClassifier(Perceptron(max_iter=num_iter, random_state=0))
OCC.fit(x_train_thres, y_train_thres)
predictionsOCC = OCC.predict(x_test_thres)
scoreOCC = OCC.score(x_test_thres, y_test_thres)

cmOCC = metrics.confusion_matrix(y_test_thres, predictionsOCC)
plt.figure(figsize=(9, 9))
sns.heatmap(cmOCC,
            annot=True,
            fmt=".3f",
            linewidths=.5,
            square=True,
            cmap='Blues_r')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
all_sample_title = 'OCC - Accuracy Score: {0}'.format(scoreOCC)
plt.title(all_sample_title, size=15)
plt.show()
Esempio n. 6
0
from sklearn import datasets
from sklearn.multiclass import OutputCodeClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
import warnings

warnings.filterwarnings('ignore')

iris = datasets.load_iris()
X, y = iris.data, iris.target
print('样本数量:%d, 特征数量:%d' % X.shape)

# 模型对象构建
# code_size : 置顶最终使用多少个子模型, 实际的子模型的数量=code_size*label_number
clf = OutputCodeClassifier(LinearSVC(random_state=0),
                           code_size=30,
                           random_state=0)
clf.fit(X, y)
# 输出预测结果值
print(clf.predict(X))
print('准确率:%.3f' % accuracy_score(y, clf.predict(X)))
print(clf.score(X, y))
# 模型属性输出
k = 1
for item in clf.estimators_:
    print('第%d个模型:' % k, end='')
    print(item)
    k += 1

print(clf.classes_)
Esempio n. 7
0
from sklearn.svm import LinearSVC

import numpy
import pandas
from numpy import genfromtxt
from sklearn import datasets
from sklearn import metrics
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

my_data = genfromtxt('cartrain.csv',
                     delimiter=',',
                     dtype=int,
                     usecols=(0, 1, 2, 3, 4, 5))
mytarget = genfromtxt('target.csv', delimiter=',', dtype=int)

trainx = my_data[0:760]
trainy = mytarget[0:760]
testx = my_data[760:]
testy = mytarget[760:]

model = OutputCodeClassifier(LinearSVC(random_state=0),
                             code_size=2,
                             random_state=1)
model.fit(trainx, trainy)
print(model.score(testx, testy))