コード例 #1
0
# z = np.zeros( (n_samples, n_categories) , dtype=float)

# Test for 10 rounds using the results from 10 fold cross validations
for i, (train_index, test_index) in enumerate(kf):

    print "run %d" % (i+1)

    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_den_train, X_den_test = X_den[train_index], X_den[test_index]

    # feed models
    clf_mNB.fit(X_train, y_train)
    clf_ridge.fit(X_train, y_train)
    clf_SGD.fit(X_train, y_train)
    clf_lSVC.fit(X_train, y_train)
    clf_SVC.fit(X_train, y_train)

    # get prediction for this fold run
    prob_mNB    = clf_mNB.predict_proba(X_test)
    prob_ridge  = clf_ridge.decision_function(X_test)
    prob_SGD    = clf_SGD.decision_function(X_test)
    prob_lSVC   = clf_lSVC.decision_function(X_test)
    prob_SVC    = clf_SVC.predict_proba(X_test)

    # add prob functions into the z 2d-array
    z_temp = (prob_mNB + prob_ridge + prob_SGD + prob_lSVC + prob_SVC)
    z = np.append(z, z_temp, axis=0)

コード例 #2
0
ファイル: 05_multilabel.py プロジェクト: YuanhaoSun/PPLearn
clf_sgd = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2")

# Logistic regression requires OneVsRestClassifier which hides
# its methods such as decision_function
# It will require extra implementation efforts to use it as a candidate
# for multilabel classification
# clf_lgr = OneVsRestClassifier(LogisticRegression(C=1000,penalty='l1'))
# kNN does not have decision function due to its nature
# clf_knn = KNeighborsClassifier(n_neighbors=13)

# train
clf_nb.fit(X, y)
clf_lsvc.fit(X, y)
clf_rdg.fit(X, y)
clf_svc.fit(X, y)
clf_sgd.fit(X, y)

print "Train time: %0.3fs" % (time() - t0)
print


# # predict by simply apply the classifier
# # this will not use the multi-label threshold
# predicted = clf_rdg.predict(X_new)
# for doc, category in zip(docs_new, predicted):
#     print '%r => %s' % (doc, data_train.target_names[int(category)])
#     print


####################################
# Multi-label prediction using Ridge