def run_kfold_svm(method, fv, fold=1): print("---run svm for method:%s of fv:%s fold:%d---" % (method, fv, fold)) train_items, val_items, test_items = load_kfold_fv(method, fv, fold) train_gene, train_fv, train_label = zip(*train_items) val_gene, val_fv, val_label = zip(*val_items) test_gene, test_fv, test_label = zip(*test_items) scaler = StandardScaler() scaler.fit(train_fv) train_fv = np.stack(scaler.transform(train_fv)) val_fv = np.stack(scaler.transform(val_fv)) test_fv = np.stack(scaler.transform(test_fv)) train_label = np.stack(train_label) val_label = np.stack(val_label) test_label = np.stack(test_label) best_classifier, best_f1 = run_classifier(train_fv, train_label, val_fv, val_label, 'linear') tdir = os.path.join("result/%s_%s" % (method, fv)) if not os.path.exists(tdir): os.mkdir(tdir) joblib.dump(best_classifier, "%s/fold%d_linear.joblib" % (tdir, fold)) C_list = [0.001, 0.01, 0.1, 1, 10, 100, 1000] gamma_list = ['auto', 0.0001, 0.001, 0.01, 0.1, 1] best_k = 'linear' best_C = None best_gamma = None for C in C_list: for gamma in gamma_list: classifier, val_f1 = run_classifier(train_fv, train_label, val_fv, val_label, 'rbf', C, gamma) if val_f1 > best_f1: best_f1 = val_f1 best_classifier = classifier best_k = 'rbf' best_C = C best_gamma = gamma name = 'fold%d_rbf_C%s_g%s' % (fold, C, gamma) pth = os.path.join(tdir, "%s.joblib" % name) joblib.dump(classifier, pth) test_pd = best_classifier.predict(test_fv) path = os.path.join(tdir, "fold%d.txt" % fold) from contextlib import redirect_stdout with open(path, 'w') as f: with redirect_stdout(f): print("\n---test res---", "K:", best_k, "C:", best_C, 'gamma', best_gamma) npmetrics.print_metrics(test_label, test_pd)
def run_classifier(train_fv, train_label, val_fv, val_label, k, C=1.0, gamma='auto'): estimator = SVC(kernel=k, C=C, gamma=gamma) classifier = OneVsRestClassifier(estimator, n_jobs=-1) classifier.fit(train_fv, train_label) val_pd = classifier.predict(val_fv) val_f1 = npmetrics.label_f1_macro(val_label, val_pd) print("\n---val---", "k:", k, 'C:', C, 'gamma', gamma, 'f1:', val_f1) npmetrics.print_metrics(val_label, val_pd) return classifier, val_f1
def svm(fv='fv0'): train_items, val_items, test_items = load_fv(fv) print("train items", len(train_items)) print("val items", len(val_items)) print("test items", len(test_items)) train_gene, train_fv, train_label = zip(*train_items) val_gene, val_fv, val_label = zip(*val_items) test_gene, test_fv, test_label = zip(*test_items) print("-------run svm for bov--------", fv) scaler = StandardScaler() scaler.fit(train_fv) train_fv = np.stack(scaler.transform(train_fv)) val_fv = np.stack(scaler.transform(val_fv)) test_fv = np.stack(scaler.transform(test_fv)) train_label = np.stack(train_label) val_label = np.stack(val_label) test_label = np.stack(test_label) kernels = ['linear', 'rbf', 'poly', 'sigmoid'] class_weights = ['balanced', None] best_f1 = 0.0 best_classifier = None best_k = None best_b = None for k in kernels: for b in class_weights: estimator = SVC(kernel=k, class_weight=b) classifier = OneVsRestClassifier(estimator, n_jobs=-1) classifier.fit(train_fv, train_label) val_pd = classifier.predict(val_fv) val_f1 = npmetrics.label_f1_macro(val_label, val_pd) print("\n---svm for bov---", "k:", k, 'b:', b, 'f1:', val_f1) npmetrics.print_metrics(val_label, val_pd) if val_f1 > best_f1: best_f1 = val_f1 best_classifier = classifier best_k = k best_b = b test_pd = best_classifier.predict(test_fv) print("\n---svm for bov test result---", "k:", best_k, "b:", best_b) npmetrics.print_metrics(test_label, test_pd)
def run_svm(method, fv): print("---run svm for method:%s of fv:%s---" % (method, fv)) train_items, val_items, test_items = load_fv(method, fv) train_gene, train_fv, train_label = zip(*train_items) val_gene, val_fv, val_label = zip(*val_items) test_gene, test_fv, test_label = zip(*test_items) scaler = StandardScaler() scaler.fit(train_fv) train_fv = np.stack(scaler.transform(train_fv)) val_fv = np.stack(scaler.transform(val_fv)) test_fv = np.stack(scaler.transform(test_fv)) train_label = np.stack(train_label) val_label = np.stack(val_label) test_label = np.stack(test_label) best_classifier, best_f1 = run_classifier(train_fv, train_label, val_fv, val_label, 'linear') joblib.dump(best_classifier, "../result/%s_%s_linear.joblib" % (method, fv)) C_list = [0.001, 0.01, 0.1, 1, 10, 100, 1000] gamma_list = ['auto', 0.0001, 0.001, 0.01, 0.1, 1] best_k = 'linear' best_C = None best_gamma = None for C in C_list: for gamma in gamma_list: classifier, val_f1 = run_classifier(train_fv, train_label, val_fv, val_label, 'rbf', C, gamma) name = '%s_%s_rbf_C%s_g%s' % (method, fv, C, gamma) pth = os.path.join("../result/%s.joblib" % name) joblib.dump(classifier, pth) if val_f1 > best_f1: best_f1 = val_f1 best_classifier = classifier best_k = 'rbf' best_C = C best_gamma = gamma test_pd = best_classifier.predict(test_fv) print("\n---test res---", "K:", best_k, "C:", best_C, 'gamma', best_gamma) npmetrics.print_metrics(test_label, test_pd)