def multiclass(train_feature, train_label, test_feature, clftype, method_name, paras): """ The multi classifier method clftype: 'multiclass', 'onevsrest', 'onevsone' method_name: the classifier name paras: list form of parameters """ Classifier, kwargs = get_classifier_by_name(method_name, paras) print 'Method: ', method_name from sklearn.cross_validation import KFold kf = KFold(len(train_label), n_folds, indices=True) index = 0 avg_f1_score_list = [0] * n_folds for train_index, test_index in kf: print 'Prepare cv dataset: %d' % index model_train_feature = train_feature[train_index, :] model_test_feature = train_feature[test_index, :] model_train_label = train_label[train_index] model_test_label = train_label[test_index] #print 'Over sampling...' #model_train_feature, model_train_label = over_sampling(model_train_feature, model_train_label) #ipdb.set_trace() print 'SMOTE over sampling...' model_train_feature, model_train_label = smote_sampling( model_train_feature, model_train_label) clf = get_classifier_by_type(clftype, model_train_feature, model_train_label, Classifier, kwargs) model_test_pred = clf.predict(model_test_feature) print 'Model testing acc:' print classification_report(model_test_label, model_test_pred) #f1_score_list = f1_score(model_test_label, model_test_pred, average=None) #avg_f1_score_list[index] = sum(f1_score_list) / len(f1_score_list) #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score_list[index] avg_f1_score_list[index] = f1_score_dict( predition2dict(model_test_pred), predition2dict(model_test_label)) print 'Avg: ', avg_f1_score_list[index] index += 1 print 'Method:', method_name avg_avg_f1_score = sum(avg_f1_score_list) / len(avg_f1_score_list) print 'Avg avg_f1_score:', avg_avg_f1_score, '\n' #print 'Oversampling...' #train_feature, train_label = over_sampling(train_feature, train_label) print 'SMOTE over sampling...' train_feature, train_label = smote_sampling(train_feature, train_label) print 'Train the whole multi-class classifiers...' clf = get_classifier_by_type(clftype, train_feature, train_label, Classifier, kwargs) train_pred = clf.predict(train_feature) test_pred = clf.predict(test_feature) print 'Model train acc:' print classification_report(train_label, train_pred) #f1_score_list = f1_score(train_label, train_pred, average=None) #avg_f1_score = sum(f1_score_list) / len(f1_score_list) #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score # training F1 score print 'Training avg F1 score:', f1_score_dict(predition2dict(train_pred), predition2dict(train_label)) return method_name, test_pred, avg_avg_f1_score
def main(n_components, n_folds, method_name): print 'Load dataset...' import pickle f = open('task2-dataset/task2-dataset.pickle', 'r') train_feature, train_label, test_feature = pickle.load(f) f.close() train_feature, test_feature = PCA_transform( train_feature, test_feature, 'task2-dataset/task2-PCA-decomp.mat') train_feature = train_feature[:, :n_components] test_feature = test_feature[:, :n_components] kwargs = {} #from sklearn.naive_bayes import GaussianNB as Classifier #method_name = 'Twostep+NB' #from sklearn.svm import LinearSVC as Classifier #method_name = 'Twostep+SVC' #kwargs = {'random_state':0, 'C':10} from QDF import QDF as Classifier method_name = 'Twostep+QDF' #from LDF import LDF as Classifier #method_name = 'Twostep+LDF' print 'Method: ', method_name from sklearn.cross_validation import KFold kf = KFold(len(train_label), n_folds, indices=True) index = 0 avg_f1_score_list = [0] * n_folds for train_index, test_index in kf: print 'Prepare cv dataset: %d' % index model_train_feature = train_feature[train_index, :] model_test_feature = train_feature[test_index, :] model_train_label = train_label[train_index] model_test_label = train_label[test_index] model_train_pred, model_test_pred = twostep(model_train_feature, model_train_label, model_test_feature, Classifier, kwargs) #print 'Model testing acc:' #print classification_report(model_test_label, model_test_pred) #f1_score_list = f1_score(model_test_label, model_test_pred, average=None) #avg_f1_score_list[index] = sum(f1_score_list) / len(f1_score_list) #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score_list[index] avg_f1_score_list[index] = f1_score_dict( predition2dict(model_test_pred), predition2dict(model_test_label)) print 'Avg: ', avg_f1_score_list[index] index += 1 print 'Method:', method_name avg_avg_f1_score = sum(avg_f1_score_list) / len(avg_f1_score_list) print 'Avg avg_f1_score:', avg_avg_f1_score, '\n' print 'Train the whole multi-class classifiers...' train_pred, test_pred = twostep(train_feature, train_label, test_feature, Classifier, kwargs) # training F1 score print 'Training avg F1 score:', f1_score_dict(predition2dict(train_pred), predition2dict(train_label)) # save the final prediction index = 0 f = open('twostep_output.csv', 'w') for y in test_pred: f.write('%d,%d\n' % (index + 1, test_pred[index])) index += 1 f.close()
def main(n_components, n_folds, method_name): print 'Load dataset...' import pickle f = open('task2-dataset/task2-dataset.pickle', 'r') train_feature, train_label, test_feature = pickle.load(f) f.close() train_feature, test_feature = PCA_transform(train_feature, test_feature, 'task2-dataset/task2-PCA-decomp.mat') train_feature = train_feature[:, :n_components] test_feature = test_feature[:, :n_components] kwargs = {} #from sklearn.naive_bayes import GaussianNB as Classifier #method_name = 'Twostep+NB' #from sklearn.svm import LinearSVC as Classifier #method_name = 'Twostep+SVC' #kwargs = {'random_state':0, 'C':10} from QDF import QDF as Classifier method_name = 'Twostep+QDF' #from LDF import LDF as Classifier #method_name = 'Twostep+LDF' print 'Method: ', method_name from sklearn.cross_validation import KFold kf = KFold(len(train_label), n_folds, indices=True) index = 0 avg_f1_score_list = [0] * n_folds for train_index, test_index in kf: print 'Prepare cv dataset: %d' % index model_train_feature = train_feature[train_index, :] model_test_feature = train_feature[test_index, :] model_train_label = train_label[train_index] model_test_label = train_label[test_index] model_train_pred, model_test_pred = twostep(model_train_feature, model_train_label, model_test_feature, Classifier, kwargs) #print 'Model testing acc:' #print classification_report(model_test_label, model_test_pred) #f1_score_list = f1_score(model_test_label, model_test_pred, average=None) #avg_f1_score_list[index] = sum(f1_score_list) / len(f1_score_list) #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score_list[index] avg_f1_score_list[index] = f1_score_dict(predition2dict(model_test_pred), predition2dict(model_test_label)) print 'Avg: ', avg_f1_score_list[index] index += 1 print 'Method:', method_name avg_avg_f1_score = sum(avg_f1_score_list) / len(avg_f1_score_list) print 'Avg avg_f1_score:', avg_avg_f1_score, '\n' print 'Train the whole multi-class classifiers...' train_pred, test_pred = twostep(train_feature, train_label, test_feature, Classifier, kwargs) # training F1 score print 'Training avg F1 score:', f1_score_dict(predition2dict(train_pred), predition2dict(train_label)) # save the final prediction index = 0 f = open('twostep_output.csv', 'w') for y in test_pred: f.write('%d,%d\n' % (index+1, test_pred[index])) index += 1 f.close()
def multiclass(train_feature, train_label, test_feature, clftype, method_name, paras): """ The multi classifier method clftype: 'multiclass', 'onevsrest', 'onevsone' method_name: the classifier name paras: list form of parameters """ Classifier, kwargs = get_classifier_by_name(method_name, paras) print 'Method: ', method_name from sklearn.cross_validation import KFold kf = KFold(len(train_label), n_folds, indices=True) index = 0 avg_f1_score_list = [0] * n_folds for train_index, test_index in kf: print 'Prepare cv dataset: %d' % index model_train_feature = train_feature[train_index, :] model_test_feature = train_feature[test_index, :] model_train_label = train_label[train_index] model_test_label = train_label[test_index] #print 'Over sampling...' #model_train_feature, model_train_label = over_sampling(model_train_feature, model_train_label) #ipdb.set_trace() print 'SMOTE over sampling...' model_train_feature, model_train_label = smote_sampling(model_train_feature, model_train_label) clf = get_classifier_by_type(clftype, model_train_feature, model_train_label, Classifier, kwargs) model_test_pred = clf.predict(model_test_feature) print 'Model testing acc:' print classification_report(model_test_label, model_test_pred) #f1_score_list = f1_score(model_test_label, model_test_pred, average=None) #avg_f1_score_list[index] = sum(f1_score_list) / len(f1_score_list) #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score_list[index] avg_f1_score_list[index] = f1_score_dict(predition2dict(model_test_pred), predition2dict(model_test_label)) print 'Avg: ', avg_f1_score_list[index] index += 1 print 'Method:', method_name avg_avg_f1_score = sum(avg_f1_score_list) / len(avg_f1_score_list) print 'Avg avg_f1_score:', avg_avg_f1_score, '\n' #print 'Oversampling...' #train_feature, train_label = over_sampling(train_feature, train_label) print 'SMOTE over sampling...' train_feature, train_label = smote_sampling(train_feature, train_label) print 'Train the whole multi-class classifiers...' clf = get_classifier_by_type(clftype, train_feature, train_label, Classifier, kwargs) train_pred = clf.predict(train_feature) test_pred = clf.predict(test_feature) print 'Model train acc:' print classification_report(train_label, train_pred) #f1_score_list = f1_score(train_label, train_pred, average=None) #avg_f1_score = sum(f1_score_list) / len(f1_score_list) #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score # training F1 score print 'Training avg F1 score:', f1_score_dict(predition2dict(train_pred), predition2dict(train_label)) return method_name, test_pred, avg_avg_f1_score