def sklearn_multiclass_prediction(mode, X_train, y_train, X_test):
    '''
    Use Scikit Learn built-in functions multiclass.OneVsRestClassifier
    and multiclass.OneVsOneClassifier to perform multiclass classification.

    Arguments:
        mode: one of 'ovr', 'ovo' or 'crammer'.
        X_train, X_test: numpy ndarray of training and test features.
        y_train: labels of training data, from 0 to 9.

    Returns:
        y_pred_train, y_pred_test: a tuple of 2 numpy ndarrays,
                                   being your prediction of labels on
                                   training and test data, from 0 to 9.
    '''
    if mode == 'ovr':
        clf = multiclass.OneVsRestClassifier(svm.LinearSVC(random_state = 12345))
        clf.fit(X_train, y_train) 
        y_pred_train = clf.predict(X_train)
        y_pred_test = clf.predict(X_test)
    elif mode == 'ovo':
        clf = multiclass.OneVsOneClassifier(svm.LinearSVC(random_state = 12345))
        clf.fit(X_train, y_train)
        y_pred_train = clf.predict(X_train)
        y_pred_test = clf.predict(X_test)
    else:
        clf = svm.LinearSVC(multi_class = 'crammer_singer')
        clf.fit(X_train, y_train) 
        y_pred_train = clf.predict(X_train)
        y_pred_test = clf.predict(X_test)
    return y_pred_train,y_pred_test
Example #2
0
def multiclass_example():
    iris = datasets.load_iris()
    X, y = iris.data, iris.target

    random_state = 0
    clf = svm.LinearSVC(random_state=random_state)

    #--------------------
    ovr_clf = multiclass.OneVsRestClassifier(clf)
    ovr_clf.fit(X, y)
    pred = ovr_clf.predict(X)
    print('Prediction (ovr) =\n', pred)

    #--------------------
    ovo_clf = multiclass.OneVsOneClassifier(clf)
    ovo_clf.fit(X, y)
    pred = ovo_clf.predict(X)
    print('Prediction (ovo) =\n', pred)

    #--------------------
    oc_clf = multiclass.OutputCodeClassifier(clf,
                                             code_size=2,
                                             random_state=random_state)
    oc_clf.fit(X, y)
    pred = oc_clf.predict(X)
    print('Prediction (oc) =\n', pred)
Example #3
0
def multiClass(trainx, trainy, testx, testy):
    """ Traitement des cas multiclasses à partir de classifieurs binaires

    :param trainx: Contient les exemples de la base d'apprentissage
    :param trainy: Labels de la base d'apprentissage
    :param testx: Contient les exemples de la base de test
    :param testy: Labels de la base de test

    """

    Unvs1 = multiclass.OneVsOneClassifier(svm.LinearSVC(random_state=0))
    UnvsAll = multiclass.OneVsRestClassifier(svm.LinearSVC(random_state=0))

    Unvs1.fit(trainx, trainy)
    err_Unvs1_train = round(1 - Unvs1.score(trainx, trainy), 3)
    err_Unvs1_test = round(1 - Unvs1.score(testx, testy), 3)

    UnvsAll.fit(trainx, trainy)
    err_UnvsAll_train = round(1 - UnvsAll.score(trainx, trainy), 3)
    err_UnvsAll_test = round(1 - UnvsAll.score(testx, testy), 3)

    print("Err_1vs1 : train %f, test %f\n" % (err_Unvs1_train, err_Unvs1_test))
    print("Err_1vsAll : train %f, test %f\n" %
          (err_UnvsAll_train, err_UnvsAll_test))

    print("========== Prediction UnvsUn ==============")
    print(Unvs1.predict(testx).shape)

    print("========= Prediction UnvsRest =============")
    print(UnvsAll.predict(testx).shape)
def SVM_FullConnected(pretrained_model, C, d_, batch_size=141, epoches=32):
    start_time = time.time()
    #制造训练数据通道
    train_ds = ReadImage.getKmeansDataSet(batch_size)
    print('训练通道构建完成')
    iterator = train_ds.make_initializable_iterator()
    data_element = iterator.get_next()
    sess = tf.Session()
    sess.run(iterator.initializer)
    train_label = []  # 增量式学习不需要生成全体样本特征
    train_data = []
    for i in range(epoches):
        cur_train_image, cur_train_label = sess.run(data_element)
        cur_train_data = pretrained_model.predict(
            cur_train_image)  #svm使用的是均值池化后的输出向量
        train_data.extend(cur_train_data)
        train_label.extend(cur_train_label)
    print('训练数据提取完成。训练数据维度%d %d' % np.shape(train_data))
    sess.close()
    train_data = np.asarray(train_data)
    train_label = np.asarray(train_label)
    #得到测试数据
    test_image, test_label = ReadImage.getTestDateSet('bow')
    print('采集测试数据完成')
    test_data = pretrained_model.predict(test_image)
    print('测试数据提取完成,测试数据维度为%d %d' % np.shape(test_data))
    end_time1 = time.time()
    print('数据提取用时:%.8ss' % (end_time1 - start_time))
    #将训练数据和测试数据进行降维处理
    pca_model = PCA(n_components=d_)
    pca_model = KernelPCA(n_components=d_, kernel='rbf')  #使用rbf降维方式
    pca_model.fit(X=train_data)
    train_data_d_ = pca_model.transform(train_data)
    print('降维后的训练数据集维度为%d %d' % np.shape(train_data_d_))
    test_data_d_ = pca_model.transform(test_data)
    print('降维后的训练数据集维度为%d %d' % np.shape(test_data_d_))
    d_time = time.time()
    print('降维时间:%.8ss' % (d_time - end_time1))
    #将训练数据核测试数据输入svm中
    svc_classifier = svm.SVC(C=C, kernel='rbf', random_state=0, gamma='auto')
    model = multiclass.OneVsOneClassifier(svc_classifier, -1)
    clf = model.fit(train_data_d_, train_label)
    end_time2 = time.time()
    print('训练完成,用时%.8ss' % (end_time2 - d_time))
    train_accuracy = clf.score(train_data_d_, train_label)
    end_time3 = time.time()
    print('tran_accuracy:%f,用时%.8s s' %
          (train_accuracy, end_time3 - end_time2))
    test_accuracy = clf.score(test_data_d_, test_label)
    end_time4 = time.time()
    print('test_accuracy:%f,用时%.8s s' % (test_accuracy, end_time4 - end_time3))
    print('训练完成,总用时%.8s s ' % (end_time4 - start_time))
    f = open('./BoW/txt_record/svmFCRecord.txt', 'a+')
    f.write('惩罚系数为:%f \n' % C)
    # f.write('降维指标为:%d 降维方式:rbf内核\n'%d_)
    f.write('one vs one classfier')
    f.write('训练准确率: %.4f 测试准确率:%.4f\n\r' % (train_accuracy, test_accuracy))
    f.close()
def sklearn_multiclass_prediction(mode, X_train, y_train, X_test):
    '''
    Use Scikit Learn built-in functions multiclass.OneVsRestClassifier
    and multiclass.OneVsOneClassifier to perform multiclass classification.

    Arguments:
        mode: one of 'ovr', 'ovo' or 'crammer'.
        X_train, X_test: numpy ndarray of training and test features.
        y_train: labels of training data, from 0 to 9.

    Returns:
        y_pred_train, y_pred_test: a tuple of 2 numpy ndarrays,
                                   being your prediction of labels on
                                   training and test data, from 0 to 9.
    '''
    if mode == 'ovr':
        model = multiclass.OneVsRestClassifier(
            svm.LinearSVC(random_state=12345))
        # print("start training")
        start = time.time()
        model.fit(X_train, y_train)
        total = time.time() - start
        # print("elapsed time {:3.2f}, start predicting".format(total))
        result = (model.predict(X_train), model.predict(X_test))
        return result
    elif mode == 'ovo':
        model = multiclass.OneVsOneClassifier(
            svm.LinearSVC(random_state=12345))
        # print("start training")
        start = time.time()
        model.fit(X_train, y_train)
        total = time.time() - start
        # print("elapsed time {:3.2f}, start predicting".format(total))
        # print("start predicting")
        result = (model.predict(X_train), model.predict(X_test))
        return result
    else:
        model = svm.LinearSVC(multi_class='crammer_singer', random_state=12345)
        # print("start training")
        start = time.time()
        model.fit(X_train, y_train)
        total = time.time() - start
        # print("elapsed time {:3.2f} start predicting".format(total))
        # print("start predicting")
        result = (model.predict(X_train), model.predict(X_test))
        return result
def learnSVM(C, train_code, train_label, test_code, test_label):
    n = np.shape(train_code)[1]  # 获得n_cluster数量
    print('进入SVM训练')
    svc_classifier = svm.SVC(C=C, kernel='rbf', random_state=0, gamma='auto')
    model = multiclass.OneVsOneClassifier(svc_classifier, -1)
    clf = model.fit(train_code, train_label)
    joblib.dump(clf,
                filename='./BoW/result/svm_' + str(n) + '_' + str(C) + '_.pkl')
    print('训练完成')
    train_accuracy = clf.score(train_code, train_label)
    print('train_accuracy:%f' % train_accuracy)
    test_accuracy = clf.score(test_code, test_label)
    print('test_accuracy:%f' % test_accuracy)
    print('训练完成')
    f = open('./BoW/txt_record/svmRecord.txt', 'a+')
    f.write('特征维度为:%d \n' % n)
    f.write('惩罚系数为:%f \n' % C)
    f.write('训练准确率: %.4f 测试准确率:%.4f\n\r' % (train_accuracy, test_accuracy))
    f.close()
Example #7
0
 def build(self, input_model, model_calibrator_id, model_calibrator_params):
     """Build a model calibrator using the specified id"""
     if model_calibrator_id == 'sklearn_CalibratedClassifierCV':
         params = model_calibrator_params
         params['base_estimator'] = input_model
         return calibration.CalibratedClassifierCV(**params)
     elif model_calibrator_id == 'sklearn_GridSearchCV':
         params = model_calibrator_params
         params['estimator'] = input_model
         return model_selection.GridSearchCV(**params)
     elif model_calibrator_id == 'sklearn_OneVsRestClassifier':
         params = model_calibrator_params
         params['estimator'] = input_model
         return multiclass.OneVsRestClassifier(**params)
     elif model_calibrator_id == 'sklearn_OneVsOneClassifier':
         params = model_calibrator_params
         params['estimator'] = input_model
         return multiclass.OneVsOneClassifier(**params)
     return None
Example #8
0
def sklearn_multiclass_prediction(mode, X_train, y_train, X_test):
    """
    Use Scikit Learn built-in functions multiclass.OneVsRestClassifier
    and multiclass.OneVsOneClassifier to perform multiclass classification.

    Arguments:
        mode: one of 'ovr', 'ovo' or 'crammer'.
        X_train, X_test: numpy ndarray of training and test features.
        y_train: labels of training data, from 0 to 9.

    Returns:
        y_pred_train, y_pred_test: a tuple of 2 numpy ndarrays,
                                   being your prediction of labels on
                                   training and test data, from 0 to 9.
    """
    y_pred_train = None
    y_pred_test = None
    # using random_state=12345 for reproductivity
    svm_model = svm.LinearSVC(random_state=12345)
    if mode == 'ovr':
        ovr_model = multiclass.OneVsRestClassifier(svm_model)
        ovr_model.fit(X_train, y_train)
        y_pred_train = ovr_model.predict(X_train)
        y_pred_test = ovr_model.predict(X_test)
    elif mode == 'ovo':
        ovo_model = multiclass.OneVsOneClassifier(svm_model)
        ovo_model.fit(X_train, y_train)
        y_pred_train = ovo_model.predict(X_train)
        y_pred_test = ovo_model.predict(X_test)
    elif mode == 'crammer':
        # using random_state=12345 for reproductivity
        crammer_singer_model = svm.LinearSVC(multi_class='crammer_singer',
                                             random_state=12345)
        crammer_singer_model.fit(X_train, y_train)
        y_pred_train = crammer_singer_model.predict(X_train)
        y_pred_test = crammer_singer_model.predict(X_test)
    else:
        print("Invalid mode. Mode should be 'ovr', 'ovo' or 'crammer'.")

    return y_pred_train, y_pred_test
Example #9
0
def svmClassification(n_cluster, C):
    start_time = time.time()
    wholelabel = readCsv('label', 'wholelabel', n_cluster)

    train_data = np.asarray(readCsv('data', 'train', n_cluster))
    print(np.shape(train_data))
    train_label_str = readCsv('label', 'train', n_cluster)
    train_data = str2float(train_data)
    train_label_int = getHistogramlabel(train_label_str, wholelabel)
    print('训练集数据读取完成')
    test_data = np.asarray(readCsv('data', 'test', n_cluster))
    print(np.shape(test_data))
    test_data = str2float(test_data)
    test_label_str = readCsv('label', 'test', n_cluster)
    test_label_int = getHistogramlabel(test_label_str, wholelabel)
    print('测试集数据读取完成')

    svc_classifier = svm.SVC(C=C, kernel='rbf', gamma='scale')
    model = multiclass.OneVsOneClassifier(svc_classifier, -1)
    print('进入训练')
    clf = model.fit(train_data, train_label_int)
    print('训练完成')
    train_accuracy = clf.score(train_data, train_label_int)
    test_accuracy = clf.score(test_data, test_label_int)
    print('train_accuracy:%f' % train_accuracy)
    print('test_accuracy:%f' % test_accuracy)

    end_time = time.time()
    f = open('../result/svm/svmRecord.txt', 'a+')
    f.write('特征维度为:%d \n' % n_cluster)
    f.write('惩罚系数为:%d \n' % C)
    f.write('训练准确率: %.4f 测试准确率:%.4f\n' % (train_accuracy, test_accuracy))
    f.write('耗时:%.8s \r\n' % (end_time - start_time))
    f.close()

    print('耗时:%.8s' % (end_time - start_time))
    print('训练结束')
def sklearn_multiclass_prediction(mode, X_train, y_train, X_test):
    '''
    Use Scikit Learn built-in functions multiclass.OneVsRestClassifier
    and multiclass.OneVsOneClassifier to perform multiclass classification.

    Arguments:
        mode: one of 'ovr', 'ovo' or 'crammer'.
        X_train, X_test: numpy ndarray of training and test features.
        y_train: labels of training data, from 0 to 9.

    Returns:
        y_pred_train, y_pred_test: a tuple of 2 numpy ndarrays,
                                   being your prediction of labels on
                                   training and test data, from 0 to 9.
    '''
    #pass
    # x_train dimension: (5000,784)
    #y_train dimension: (5000,)
    if(mode == "ovr"):
        clf = svm.LinearSVC(multi_class = "ovr", random_state = 12345)
        #print("clfclflalalalalal", clf)
        ovr_classifier = multiclass.OneVsRestClassifier(clf)
        ovr_classifier.fit(X_train,y_train)
        y_pred_train = ovr_classifier.predict(X_train)
        y_pred_test = ovr_classifier.predict(X_test)
    if(mode =="ovo"):
        clf = svm.LinearSVC(random_state = 12345)
        ovo_classifier = multiclass.OneVsOneClassifier(clf)
        ovo_classifier.fit(X_train,y_train)
        y_pred_train = ovo_classifier.predict(X_train)
        y_pred_test = ovo_classifier.predict(X_test)
    if(mode == "crammer"):
        clf = svm.LinearSVC(multi_class = "crammer_singer", random_state = 12345)
        clf.fit(X_train,y_train)
        y_pred_train = clf.predict(X_train)
        y_pred_test = clf.predict(X_test)
    return y_pred_train, y_pred_test
Example #11
0
def sklearn_multiclass_prediction(mode, X_train, y_train, X_test):
    '''
    Use Scikit Learn built-in functions multiclass.OneVsRestClassifier
    and multiclass.OneVsOneClassifier to perform multiclass classification.

    Arguments:
        mode: one of 'ovr', 'ovo' or 'crammer'.
        X_train, X_test: numpy ndarray of training and test features.
        y_train: labels of training data, from 0 to 9.

    Returns:
        y_pred_train, y_pred_test: a tuple of 2 numpy ndarrays,
                                   being your prediction of labels on
                                   training and test data, from 0 to 9.
    '''
    clf = None
    estimator = svm.LinearSVC(random_state=12345, verbose=False)
    #
    if mode == 'ovr':
        clf = multiclass.OneVsRestClassifier(estimator=estimator, n_jobs=-1)
    elif mode == 'ovo':
        clf = multiclass.OneVsOneClassifier(estimator=estimator, n_jobs=-1)
    elif mode == 'crammer':
        clf = svm.LinearSVC(random_state=12345, multi_class='crammer_singer')
    else:
        print("Invalid mode {:s}".format(mode))
        return -1

    # Fit the model with given data
    clf.fit(X_train, y_train)

    # Predict the training data using the model
    y_pred_train = clf.predict(X_train)
    # Predict the testing data using the model
    y_pred_test = clf.predict(X_test)

    return y_pred_train, y_pred_test
Example #12
0
def train_model(folders):
    """
    Takes a list of folders from which to draw data files to train the model.
    Parses sentences in a similar way to when testing, by iteratively looking at
    target nodes in the remaining subtrees of the sentence. For each pair, the
    algorithm derives a list of features and a correct construction action. Once
    there are all found it uses them to generate a model, which is returned.
    ========== INCOMPLETE ==========
    """
    raw_features = []
    classifications = []
    for filepath in data_file_paths_for_folders(folders):
        for sentence in dt.parsed_sents(filepath):
            T = flattened_node_list(sentence)
            i = 0
            no_construction = True
            while len(T) >= 1:
                if i == len(T) - 1:
                    if no_construction:
                        break
                    no_construction = True
                    i = 0
                else:
                    target_features = get_contextual_features(T, i)
                    target_classification = get_classification(T, i, sentence)
                    raw_features.append(target_features)
                    classifications.append(target_classification)
                    construction(T, i, target_classification)
                    if target_classification != SHIFT:
                        no_construction = False
                i += 1
    vectorizer = DictVectorizer()
    feature_matrix = vectorizer.fit_transform(raw_features)
    feature_names = vectorizer.get_feature_names()
    model = multiclass.OneVsOneClassifier(svm.LinearSVC())
    model.fit(feature_matrix, classifications)
    return vectorizer, model
Example #13
0
	n_init=10,
	max_iter=300,
	tol=0.0001,
	precompute_distances='auto',
	verbose=0,
	random_state=None,
	copy_x=True,
	n_jobs=-1)
all_ask_cluster_model.fit(all_ask_prices_nm)
all_ask_labels = all_ask_cluster_model.predict(all_ask_prices_nm)

#Classifying on the basis of clusters
print("Classifying...")
bid_cluster_classifier_ada = multiclass.OneVsOneClassifier(estimator=ensemble.AdaBoostClassifier(base_estimator=None,
	n_estimators=50,
	learning_rate=1.0,
	algorithm='SAMME.R',
	random_state=None),
	n_jobs=-1)
bid_cluster_classifier_ada.fit(trainFeatures, all_bid_labels)
print("Bid accuracy with AdaBoost: ", bid_cluster_classifier_ada.score(trainFeatures, all_bid_labels))

ask_cluster_classifier_ada = multiclass.OneVsOneClassifier(estimator=ensemble.AdaBoostClassifier(base_estimator=None,
	n_estimators=50,
	learning_rate=1.0,
	algorithm='SAMME.R',
	random_state=None),
	n_jobs=-1)
ask_cluster_classifier_ada.fit(trainFeatures, all_ask_labels)
print("Ask accuracy with AdaBoost Classifier: ", ask_cluster_classifier_ada.score(trainFeatures, all_ask_labels))

ada = {'bid': bid_cluster_classifier_ada, 'ask': ask_cluster_classifier_ada}
Example #14
0
for ix, row in test_table.iterrows():
    X = (np.array(row[featureColumns])).flatten('F')
    testX[index, :] = X

    index = index + 1

print "Classifier for Clusters..."
bid_cluster_classifier = multiclass.OneVsOneClassifier(
    estimator=ensemble.RandomForestClassifier(n_estimators=30,
                                              criterion='gini',
                                              max_depth=None,
                                              min_samples_split=2,
                                              min_samples_leaf=1,
                                              min_weight_fraction_leaf=0.0,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              bootstrap=True,
                                              oob_score=False,
                                              n_jobs=1,
                                              random_state=None,
                                              verbose=0,
                                              warm_start=False,
                                              class_weight=None),
    n_jobs=-1)
bid_cluster_classifier.fit(trainX, all_bid_labels)
print "Bid accuracy with Random Forest: ", bid_cluster_classifier.score(
    trainX, all_bid_labels)

ask_cluster_classifier = multiclass.OneVsOneClassifier(
    estimator=ensemble.RandomForestClassifier(n_estimators=30,
                                              criterion='gini',
Example #15
0
        float(sl[7]), float(sl[8]), float(sl[9]), float(sl[10]), float(sl[11])]])
    data = np.concatenate((data,d))
    target = np.append(target, sl[0])
data = data[1:,:]
file_in.close()


# define classifiers
kNeighborsClassifier = neighbors.KNeighborsClassifier()
nearestCentroid = neighbors.NearestCentroid()
gaussianNB = naive_bayes.GaussianNB()
multinomialNB = naive_bayes.MultinomialNB()
bernoulliNB = naive_bayes.BernoulliNB()
linearSVC = svm.LinearSVC()
oneVsRestClassifier = multiclass.OneVsRestClassifier(linearSVC)
oneVsOneClassifier = multiclass.OneVsOneClassifier(linearSVC)
ridgeClassifier = linear_model.RidgeClassifier()
logisticRegression = linear_model.LogisticRegression()
decisionTreeClassifier = tree.DecisionTreeClassifier()
extraTreeClassifier = tree.ExtraTreeClassifier()
extraTreesClassifier = ensemble.ExtraTreesClassifier()
adaBoost = ensemble.AdaBoostClassifier()
randomForest = ensemble.RandomForestClassifier()
baggingClassifier = ensemble.BaggingClassifier()
gradientBoostingClassifier = ensemble.GradientBoostingClassifier()

classifiers = [
    kNeighborsClassifier,
    nearestCentroid,
    gaussianNB,
    multinomialNB,
cnt = 0
for i, v in enumerate(word_vectors.index2word):
    if v.startswith("a"):
        x_train.append(word_vectors.vectors[i])
        y_value = 5
        if v in target_author:
            y_value = target_author[v]
            cnt = cnt + 1
        y_train.append([y_value])

x = np.array(x_train)
y = np.array(y_train)

x_train = x[:int((0.8 * len(x)))]
y_train = y[:int((0.8 * len(x)))]

x_test = x[int(0.8 * len(x)):]
y_test = y[int(0.8 * len(x)):]

print("Training:\n")
model = multiclass.OneVsOneClassifier(svm.LinearSVC())
model.fit(x_train, y_train)
print("Saving model to model.pkl\n")
joblib.dump(model, 'model.pkl')
print("Accuracy on test\n")
y_test_predicted = model.predict(x_test)
metrics.accuracy_score(y_test, y_test_predicted)
print("Accuracy on train\n")
y_train_predicted = model.predict(x_train)
metrics.accuracy_score(y_train, y_train_predicted)
 def generate_classifier(self):
     self.classifer = sklearn_multiclass.OneVsOneClassifier(sklearn_svm.SVC(kernel=self.kernel, 
                                                                    gamma=self.gamma, 
                                                                    C=self.constant))
Example #18
0
       [  7.,   6., 414.,   2.,  52.,   0.,  51.,   0.,   1.,   0.],
       [  7.,   2.,   3., 407.,  11.,   0.,   4.,   0.,   0.,   0.],
       [  0.,   0.,  26.,   7., 367.,   0.,  19.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0., 432.,   0.,  48.,   0.,   5.],
       [ 64.,   6.,  43.,  43.,  54.,   0., 347.,   0.,   3.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   7.,   0., 410.,   0.,   6.],
       [ 23.,   2.,  14.,  20.,  15.,  46.,  26.,   6., 495.,   2.],
       [  0.,   0.,   0.,   0.,   0.,  14.,   0.,  36.,   0., 487.]])
# ## sklearn svm

# In[ ]:


# Multi class using SK learn 1 vs 1
# Initialize classifier
classifier = multiclass.OneVsOneClassifier(svm.SVC(kernel='rbf',C=1,gamma=0.05))


# In[ ]:


Classifiers = []


# In[ ]:


# Train
Yi = np.ones(2250).astype(float)
# numCLF = 0
st = time.time()
Example #19
0
def skl_ovo(X_train, y_train, X_test):
    model = multiclass.OneVsOneClassifier(svm.LinearSVC()).fit(
        X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    return y_pred_train, y_pred_test
Example #20
0
remaining_x = np.genfromtxt(
    './Datasets/ismir04_genre/RemainingFeaturesISMIR.csv', delimiter=',')
X = np.hstack((X, remaining_x))

scale = MinMaxScaler((-1, 1))
scaled_x = scale.fit_transform(X)

ovo_feature_sets = np.load('./Results/Experiment2/ISMIR/ind_ovo_fs.npy')

ovo_fs_hs = ovo_feature_sets[:, 0]
ovo_fs_cs = ovo_feature_sets[:, 1]
ovo_fs_dfa = ovo_feature_sets[:, 2]

clf = svm.SVC(kernel='rbf', C=3, gamma=0.02)

ovo_clf = multiclass.OneVsOneClassifier(clf)
ovo_clf_hs = ovo_multi_features.OneVsOneClassifier(clf, ovo_fs_hs)
ovo_clf_cs = ovo_multi_features.OneVsOneClassifier(clf, ovo_fs_cs)
ovo_clf_dfa = ovo_multi_features.OneVsOneClassifier(clf, ovo_fs_dfa)

cv = len(np.unique(y))
dataset_size = len(y)
scores_array_length = cv * 10

no_fs_time = np.empty(scores_array_length)
hs_time = np.empty(scores_array_length)
cs_time = np.empty(scores_array_length)
dfa_time = np.empty(scores_array_length)

no_fs_score = np.empty(scores_array_length)
hs_score = np.empty(scores_array_length)
                                     min_samples_leaf=3,
                                     oob_score=True,
                                     bootstrap=True,
                                     random_state=4)
_RFC.fit(Xconcat.drop('y', 1), Xconcat.y)
_RFC.score(Xconcat.drop('y', 1), Xconcat.y)
_RFC.oob_score_
_RFC.score(testConcat.drop('y', 1), testConcat.y)
_RFC.feature_importances_

### Multi-class tests ###

_SVC = svm.SVC(class_weight='auto')

#One versus one
_OVO = multiclass.OneVsOneClassifier(_SVC, n_jobs=-1)
_OVO.fit(X.get_values(), y.get_values())
_OVO.score(Xtest.get_values(), ytest.get_values())

#One versus rest
_OVR = multiclass.OneVsRestClassifier(_SVC, n_jobs=-1)
_OVR.fit(X.get_values(), y.get_values())
_OVR.score(Xtest.get_values(), ytest.get_values())

#ECOC
for i in np.arange(1, 10):
    _ECOC = multiclass.OutputCodeClassifier(_SVC, i, n_jobs=-1)
    _ECOC.fit(X.get_values(), y.get_values())
    _ECOC.score(Xtest.get_values(), ytest.get_values())

#Random Forest usando os valores iniciais sem SVM
Example #22
0
def svm(data,
        kernel,
        classification,
        weighted=False,
        plot=False,
        onePlot=False):

    # list of labels
    labels = ["1", "2", "3", "5", "6", "7"]

    # initialize lists for confusion matrices, accuracies, and best parameters
    confusions = []
    accuracies = []
    bestParams = []
    plots = []
    timePerFold = []

    # set the proper classification
    if classification == 'ovo':
        # if weighted option is chosen, balance the dataset so that the weights are inversely proportional to frequency
        if weighted == False:
            svc = multiclass.OneVsOneClassifier(SVC(kernel=kernel))
        else:
            svc = multiclass.OneVsOneClassifier(
                SVC(kernel=kernel, class_weight='balanced'))

        # initialize parameters for GridSearchCV
        params = {
            "estimator__C": [0.01, 1, 10, 100, 500, 1000],
            'estimator__gamma': [0.01, 1, 10]
        }

        c = "estimator__C"
        gamma = 'estimator__gamma'
        degree = 'estimator__degree'
    elif classification == 'ovr':
        svc = SVC(kernel=kernel)

        # initialize parameters for GridSearchCV
        params = {"C": [0.01, 1, 10, 100, 500, 1000], "gamma": [0.01, 1, 10]}

        c = "C"
        gamma = 'gamma'
        degree = 'degree'
    else:
        print('Invalid Classifier Type')
        return

    # if we have a polynomial kernel we want to reduce the penalty parameter to reduce training time
    if kernel == 'poly':
        params[c] = [0.001, 0.01, 0.1]
        params[degree] = [2, 3, 4]
    elif kernel == 'sigmoid':
        params[gamma].append(0.001)
        params[gamma].append(0.0001)

    # split the data into features and labels
    data = data.values
    x = data[:, 0:9]
    y = data[:, 9:]

    # normalize the data
    scaler = StandardScaler()
    x = scaler.fit_transform(x)

    # perform k-fold cross validation, in this case we're using 5 folds
    kf = KFold(n_splits=5, random_state=1, shuffle=True)

    foldNumber = 0
    totalTime = 0
    for train, test in kf.split(data):

        # increase which fold we're on
        foldNumber += 1

        # get training and test splits
        x_train, x_test = x[train], x[test]
        y_train, y_test = y[train], y[test]

        y_train, y_test = y_train.ravel(), y_test.ravel()

        # perform Grid Search on the training sets
        clf = GridSearchCV(svc, cv=5, param_grid=params, iid=True)

        # start time for the training
        t0 = time.time()

        # fit the model
        clf.fit(x_train, y_train)

        # end time for the training
        t1 = time.time()
        timePerFold.append(t1 - t0)
        totalTime += t1 - t0

        # test the model
        y_pred = clf.predict(x_test)

        # save the best parameters for this fold
        bestParams.append(clf.best_params_)

        # plot the grid search results, save for later
        if (plot and kernel != 'poly') or onePlot:
            plots.append(
                plotGridSearch(clf.cv_results_, params[c], params[gamma], "C",
                               "Gamma", foldNumber, kernel, classification))
        elif plot and kernel == 'poly':
            plots.append(
                plotGS3D(clf.cv_results_, params[c], params[gamma],
                         params[degree], foldNumber, classification))

        # append confusion matrix and accuracy to respective list
        accuracies.append(accuracy_score(y_test, y_pred))
        confusions.append(confusion_matrix(y_test, y_pred))

    # get mean accuracy
    meanAccuracy = np.mean(accuracies) * 100

    # add weighted to kernel name if applicable
    if weighted:
        kernel += "_Weighted"

    # save all the plots as a pdf
    if plot:

        # if we are on the polynomial kernel, flatten the list of lists
        if kernel == 'poly' or kernel == 'poly_Weighted':
            plots = [plot for subplot in plots for plot in subplot]

        file = pdf.PdfPages(kernel.title() + "_Kernel_" +
                            classification.upper() + "_Classification.pdf")
        for fig in plots:
            file.savefig(fig, bbox_inches='tight')
            plt.close(fig)

        file.close()

        # get averaged confusion matrix
        confusions = [
            pd.DataFrame(data=c, columns=labels, index=labels)
            for c in confusions
        ]
        concatCM = pd.concat(confusions)
        cm_total = concatCM.groupby(concatCM.index)
        cm_average = cm_total.mean()

        # plot average confusion matrix
        plotConfusionMatrix(cm_average, kernel, classification)
    elif onePlot:
        plots[1].show()

    # print some useful information
    print("-" * 300)
    print("Classification: ", classification)
    print("Kernel: ", kernel)
    print("Mean Accuracy: ", meanAccuracy)
    print("Time it took to train: ", totalTime)
    print("Time per Fold", timePerFold)
    print("Best Parameters per Fold: ", bestParams)
    print()
    return totalTime, meanAccuracy
Example #23
0
    ovr = multiclass.OneVsRestClassifier(gbc)
    y_pred = ovr.fit(X_train, y_train).predict(X_test)
    for i in range(0, len(y_pred)):
        if y_pred[i] == y_test[i]:
            n = n + 1
        box[y_test[i] - 1,
            y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1
    print "One vs. Rest: ", n / 0.72
    for i in range(0, 6):
        for j in range(0, 6):
            print '{:5.0f} '.format(box[i, j]),
        print

    n = 0
    box = np.zeros([6, 6])
    ovo = multiclass.OneVsOneClassifier(gbc)
    y_pred = ovo.fit(X_train, y_train).predict(X_test)
    for i in range(0, len(y_pred)):
        if y_pred[i] == y_test[i]:
            n = n + 1
        box[y_test[i] - 1,
            y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1
    print "One vs. One: ", n / 0.72
    for i in range(0, 6):
        for j in range(0, 6):
            print '{:5.0f} '.format(box[i, j]),
        print

    for k in range(30, 36, 6):
        box = np.zeros([6, 6])
        accuracy = np.zeros(100)
def oneVOne(x, y, x_test):
    result = mcl.OneVsOneClassifier(LinearSVC()).fit(x, y)
    preds = result.predict(x_test)
    return preds
Example #25
0
    ovr = multiclass.OneVsRestClassifier(gnb)
    y_pred = ovr.fit(X_train, y_train).predict(X_test)
    for i in range(0, len(y_pred)):
        if y_pred[i] == y_test[i]:
            n = n + 1
        box[y_test[i] - 1,
            y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1
    print "One vs. Rest: ", n / 0.72
    for i in range(0, 6):
        for j in range(0, 6):
            print '{:5.0f} '.format(box[i, j]),
        print

    n = 0
    box = np.zeros([6, 6])
    ovo = multiclass.OneVsOneClassifier(gnb)
    y_pred = ovo.fit(X_train, y_train).predict(X_test)
    for i in range(0, len(y_pred)):
        if y_pred[i] == y_test[i]:
            n = n + 1
        box[y_test[i] - 1,
            y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1
    print "One vs. One: ", n / 0.72
    for i in range(0, 6):
        for j in range(0, 6):
            print '{:5.0f} '.format(box[i, j]),
        print

    for k in range(60, 66, 6):
        box = np.zeros([6, 6])
        accuracy = np.zeros(100)
Example #26
0
                        class_weight=None,
                        epsilon=0.1,
                        eta0=0.0,
                        fit_intercept=True,
                        l1_ratio=0.15,
                        learning_rate='optimal',
                        loss='squared_hinge',
                        n_iter=1000,
                        n_jobs=1,
                        penalty='l1',
                        power_t=0.5,
                        random_state=None,
                        shuffle=True,
                        verbose=0,
                        warm_start=False)
multiOvO = multiclass.OneVsOneClassifier(sgdBase)

# sgd + adaboost
numBoost = 1000
# multiOvO.fit(trainXInput, trainYInput)
boostEps, boostAlp, boostModel = adaBoost(multiOvO, numBoost, trainXInput,
                                          trainYInput)

# prediction
nClasses = trainingTargets.shape[1]
boundaryVal = 0.33

# defined output
baseFileName = '../multiOvOSgd'
outBoost = range(100, numBoost + 1, 100)
for i in range(len(outBoost)):