# z = np.zeros( (n_samples, n_categories) , dtype=float) # Test for 10 rounds using the results from 10 fold cross validations for i, (train_index, test_index) in enumerate(kf): print "run %d" % (i+1) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] X_den_train, X_den_test = X_den[train_index], X_den[test_index] # feed models clf_mNB.fit(X_train, y_train) clf_ridge.fit(X_train, y_train) clf_SGD.fit(X_train, y_train) clf_lSVC.fit(X_train, y_train) clf_SVC.fit(X_train, y_train) # get prediction for this fold run prob_mNB = clf_mNB.predict_proba(X_test) prob_ridge = clf_ridge.decision_function(X_test) prob_SGD = clf_SGD.decision_function(X_test) prob_lSVC = clf_lSVC.decision_function(X_test) prob_SVC = clf_SVC.predict_proba(X_test) # add prob functions into the z 2d-array z_temp = (prob_mNB + prob_ridge + prob_SGD + prob_lSVC + prob_SVC) z = np.append(z, z_temp, axis=0)
clf_sgd = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2") # Logistic regression requires OneVsRestClassifier which hides # its methods such as decision_function # It will require extra implementation efforts to use it as a candidate # for multilabel classification # clf_lgr = OneVsRestClassifier(LogisticRegression(C=1000,penalty='l1')) # kNN does not have decision function due to its nature # clf_knn = KNeighborsClassifier(n_neighbors=13) # train clf_nb.fit(X, y) clf_lsvc.fit(X, y) clf_rdg.fit(X, y) clf_svc.fit(X, y) clf_sgd.fit(X, y) print "Train time: %0.3fs" % (time() - t0) print # # predict by simply apply the classifier # # this will not use the multi-label threshold # predicted = clf_rdg.predict(X_new) # for doc, category in zip(docs_new, predicted): # print '%r => %s' % (doc, data_train.target_names[int(category)]) # print #################################### # Multi-label prediction using Ridge