Example #1
0
def run_kfold_svm(method, fv, fold=1):
    print("---run svm for method:%s of fv:%s fold:%d---" % (method, fv, fold))
    train_items, val_items, test_items = load_kfold_fv(method, fv, fold)

    train_gene, train_fv, train_label = zip(*train_items)
    val_gene, val_fv, val_label = zip(*val_items)
    test_gene, test_fv, test_label = zip(*test_items)

    scaler = StandardScaler()
    scaler.fit(train_fv)
    train_fv = np.stack(scaler.transform(train_fv))
    val_fv = np.stack(scaler.transform(val_fv))
    test_fv = np.stack(scaler.transform(test_fv))

    train_label = np.stack(train_label)
    val_label = np.stack(val_label)
    test_label = np.stack(test_label)

    best_classifier, best_f1 = run_classifier(train_fv, train_label, val_fv,
                                              val_label, 'linear')

    tdir = os.path.join("result/%s_%s" % (method, fv))
    if not os.path.exists(tdir):
        os.mkdir(tdir)
    joblib.dump(best_classifier, "%s/fold%d_linear.joblib" % (tdir, fold))

    C_list = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
    gamma_list = ['auto', 0.0001, 0.001, 0.01, 0.1, 1]
    best_k = 'linear'
    best_C = None
    best_gamma = None
    for C in C_list:
        for gamma in gamma_list:
            classifier, val_f1 = run_classifier(train_fv, train_label, val_fv,
                                                val_label, 'rbf', C, gamma)

            if val_f1 > best_f1:
                best_f1 = val_f1
                best_classifier = classifier
                best_k = 'rbf'
                best_C = C
                best_gamma = gamma

                name = 'fold%d_rbf_C%s_g%s' % (fold, C, gamma)
                pth = os.path.join(tdir, "%s.joblib" % name)
                joblib.dump(classifier, pth)

    test_pd = best_classifier.predict(test_fv)
    path = os.path.join(tdir, "fold%d.txt" % fold)
    from contextlib import redirect_stdout
    with open(path, 'w') as f:
        with redirect_stdout(f):
            print("\n---test res---", "K:", best_k, "C:", best_C, 'gamma',
                  best_gamma)
            npmetrics.print_metrics(test_label, test_pd)
Example #2
0
def run_classifier(train_fv,
                   train_label,
                   val_fv,
                   val_label,
                   k,
                   C=1.0,
                   gamma='auto'):
    estimator = SVC(kernel=k, C=C, gamma=gamma)
    classifier = OneVsRestClassifier(estimator, n_jobs=-1)
    classifier.fit(train_fv, train_label)

    val_pd = classifier.predict(val_fv)
    val_f1 = npmetrics.label_f1_macro(val_label, val_pd)
    print("\n---val---", "k:", k, 'C:', C, 'gamma', gamma, 'f1:', val_f1)
    npmetrics.print_metrics(val_label, val_pd)
    return classifier, val_f1
Example #3
0
def svm(fv='fv0'):
    train_items, val_items, test_items = load_fv(fv)
    print("train items", len(train_items))
    print("val items", len(val_items))
    print("test items", len(test_items))

    train_gene, train_fv, train_label = zip(*train_items)
    val_gene, val_fv, val_label = zip(*val_items)
    test_gene, test_fv, test_label = zip(*test_items)

    print("-------run svm for bov--------", fv)
    scaler = StandardScaler()
    scaler.fit(train_fv)
    train_fv = np.stack(scaler.transform(train_fv))
    val_fv = np.stack(scaler.transform(val_fv))
    test_fv = np.stack(scaler.transform(test_fv))

    train_label = np.stack(train_label)
    val_label = np.stack(val_label)
    test_label = np.stack(test_label)

    kernels = ['linear', 'rbf', 'poly', 'sigmoid']
    class_weights = ['balanced', None]
    best_f1 = 0.0
    best_classifier = None
    best_k = None
    best_b = None
    for k in kernels:
        for b in class_weights:
            estimator = SVC(kernel=k, class_weight=b)
            classifier = OneVsRestClassifier(estimator, n_jobs=-1)
            classifier.fit(train_fv, train_label)

            val_pd = classifier.predict(val_fv)
            val_f1 = npmetrics.label_f1_macro(val_label, val_pd)
            print("\n---svm for bov---", "k:", k, 'b:', b, 'f1:', val_f1)
            npmetrics.print_metrics(val_label, val_pd)
            if val_f1 > best_f1:
                best_f1 = val_f1
                best_classifier = classifier
                best_k = k
                best_b = b

    test_pd = best_classifier.predict(test_fv)
    print("\n---svm for bov test result---", "k:", best_k, "b:", best_b)
    npmetrics.print_metrics(test_label, test_pd)
Example #4
0
def run_svm(method, fv):
    print("---run svm for method:%s of fv:%s---" % (method, fv))
    train_items, val_items, test_items = load_fv(method, fv)

    train_gene, train_fv, train_label = zip(*train_items)
    val_gene, val_fv, val_label = zip(*val_items)
    test_gene, test_fv, test_label = zip(*test_items)

    scaler = StandardScaler()
    scaler.fit(train_fv)
    train_fv = np.stack(scaler.transform(train_fv))
    val_fv = np.stack(scaler.transform(val_fv))
    test_fv = np.stack(scaler.transform(test_fv))

    train_label = np.stack(train_label)
    val_label = np.stack(val_label)
    test_label = np.stack(test_label)

    best_classifier, best_f1 = run_classifier(train_fv, train_label, val_fv,
                                              val_label, 'linear')
    joblib.dump(best_classifier,
                "../result/%s_%s_linear.joblib" % (method, fv))

    C_list = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
    gamma_list = ['auto', 0.0001, 0.001, 0.01, 0.1, 1]
    best_k = 'linear'
    best_C = None
    best_gamma = None
    for C in C_list:
        for gamma in gamma_list:
            classifier, val_f1 = run_classifier(train_fv, train_label, val_fv,
                                                val_label, 'rbf', C, gamma)
            name = '%s_%s_rbf_C%s_g%s' % (method, fv, C, gamma)
            pth = os.path.join("../result/%s.joblib" % name)
            joblib.dump(classifier, pth)

            if val_f1 > best_f1:
                best_f1 = val_f1
                best_classifier = classifier
                best_k = 'rbf'
                best_C = C
                best_gamma = gamma

    test_pd = best_classifier.predict(test_fv)
    print("\n---test res---", "K:", best_k, "C:", best_C, 'gamma', best_gamma)
    npmetrics.print_metrics(test_label, test_pd)