Esempio n. 1
0
def main(train_ratio=0.9):
    options['show_progress'] = False
    xs, ys = shuffle_args(*retrieve_data(as_list=True, negative_label=-1.0, positive_label=1.0, insert_bias=False))
    xs_train, xs_test = split_with_ratio(xs, train_ratio)
    ys_train, ys_test = split_with_ratio(ys, train_ratio)

    C = optimal_regularizer(xs_train, ys_train, linear_kernel)
    print('best C=%4.1e' % C)
    w, b = train_svm(xs_train, ys_train, C, linear_kernel)

    stats = {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0}
    for x, y in zip(xs_test, ys_test):
        yc = classify(x, w, b)
        if int(y) == -1:
            key = 'tn' if yc == -1 else 'fp'
            stats[key] += 1
        if int(y) == 1:
            key = 'tp' if yc == 1 else 'fn'
            stats[key] += 1

    precision = safe_division(stats['tp'], stats['tp'] + stats['fp'])
    recall = safe_division(stats['tp'], stats['tp'] + stats['fn'])
    f1 = safe_division(2 * precision * recall, precision + recall)

    print('precision  = %6.2f%%' % (100 * precision))
    print('recall     = %6.2f%%' % (100 * recall))
    print('F1 measure = %6.2f' % f1)
Esempio n. 2
0
def main(train_ratio=0.9):
    options['show_progress'] = False
    xs, ys = shuffle_args(*retrieve_data(as_list=True,
                                         negative_label=-1.0,
                                         positive_label=1.0,
                                         insert_bias=False))
    xs_train, xs_test = split_with_ratio(xs, train_ratio)
    ys_train, ys_test = split_with_ratio(ys, train_ratio)

    C = optimal_regularizer(xs_train, ys_train, linear_kernel)
    print('best C=%4.1e' % C)
    w, b = train_svm(xs_train, ys_train, C, linear_kernel)

    stats = {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0}
    for x, y in zip(xs_test, ys_test):
        yc = classify(x, w, b)
        if int(y) == -1:
            key = 'tn' if yc == -1 else 'fp'
            stats[key] += 1
        if int(y) == 1:
            key = 'tp' if yc == 1 else 'fn'
            stats[key] += 1

    precision = safe_division(stats['tp'], stats['tp'] + stats['fp'])
    recall = safe_division(stats['tp'], stats['tp'] + stats['fn'])
    f1 = safe_division(2 * precision * recall, precision + recall)

    print('precision  = %6.2f%%' % (100 * precision))
    print('recall     = %6.2f%%' % (100 * recall))
    print('F1 measure = %6.2f' % f1)
Esempio n. 3
0
def main(test_fraction):
    xs, ys = retrieve_data()
    test_size = int(len(xs) * test_fraction)

    w = train_perceptron(xs[:-test_size], ys[:-test_size], iters=10000)

    # 'tp' — true positive
    # 'tn' — true negative
    # 'fp' — false positive
    # 'fn' — false negative
    stats = {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0}
    for x, y in zip(xs[-test_size:], ys[-test_size:]):
        yc = classify(w, x)
        if y == -1:
            key = 'tn' if yc == -1 else 'fp'
            stats[key] += 1
        if y == 1:
            key = 'tp' if yc == 1 else 'fn'
            stats[key] += 1

    precision = safe_division(stats['tp'], stats['tp'] + stats['fp'])
    recall = safe_division(stats['tp'], stats['tp'] + stats['fn'])
    f1 = safe_division(2 * precision * recall, precision + recall)

    print('precision  = %6.2f%%' % (100 * precision))
    print('recall     = %6.2f%%' % (100 * recall))
    print('F1 measure = %6.2f' % f1)
Esempio n. 4
0
def main(test_fraction):
    xs, ys = retrieve_data()
    test_size = int(len(xs) * test_fraction)

    w = train_perceptron(xs[:-test_size], ys[:-test_size], iters=10000)

    # 'tp' — true positive
    # 'tn' — true negative
    # 'fp' — false positive
    # 'fn' — false negative
    stats = {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0}
    for x, y in zip(xs[-test_size:], ys[-test_size:]):
        yc = classify(w, x)
        if y == -1:
            key = 'tn' if yc == -1 else 'fp'
            stats[key] += 1
        if y == 1:
            key = 'tp' if yc == 1 else 'fn'
            stats[key] += 1

    precision = safe_division(stats['tp'], stats['tp'] + stats['fp'])
    recall = safe_division(stats['tp'], stats['tp'] + stats['fn'])
    f1 = safe_division(2 * precision * recall, precision + recall)

    print('precision  = %6.2f%%' % (100 * precision))
    print('recall     = %6.2f%%' % (100 * recall))
    print('F1 measure = %6.2f' % f1)
Esempio n. 5
0
def main():
    xs, ys = retrieve_data(as_list=True)
    print('debug: data retrieved')
    ratio = 0.9
    xs_train, xs_test = split_with_ratio(xs, ratio)
    ys_train, ys_test = split_with_ratio(ys, ratio)

    train = (xs_train, ys_train)
    test = (xs_test, ys_test)
    C = 1e2

    check_kernel(train, test, C, linear_kernel, 'linear')
    for i in range(2, 5):
        check_kernel(train, test, C, poly_kernel_wrapper(i), 'polynomial %d' % i)
    check_kernel(train, test, C, gaussian_kernel_wrapper(1.0), 'gaussian')
Esempio n. 6
0
def main(test_fraction=0.1):
    xs, ys = retrieve_data(as_list=True, negative_label=0)

    test_size = int(len(xs) * test_fraction)

    train_xs = xs[:-test_size]
    train_ys = ys[:-test_size]
    C = best_regularization(train_xs, train_ys)
    model = svm.LinearSVC(C=C)
    model.fit(train_xs, train_ys)

    errors = 0
    for x, x_pred in zip(list(ys[-test_size:]), list(model.predict(xs[-test_size:]))):
        if x != x_pred:
            errors += 1
    print("error on test set: %6.2f%%" % (100 * errors / test_size))
    print("regularization constant is 1e-%d" % round(log(C, 0.1)))
Esempio n. 7
0
def main(test_fraction=0.1):
    xs, ys = retrieve_data(as_list=True, negative_label=0)

    test_size = int(len(xs) * test_fraction)

    train_xs = xs[:-test_size]
    train_ys = ys[:-test_size]
    C = best_regularization(train_xs, train_ys)
    model = svm.LinearSVC(C=C)
    model.fit(train_xs, train_ys)

    errors = 0
    for x, x_pred in zip(list(ys[-test_size:]),
                         list(model.predict(xs[-test_size:]))):
        if x != x_pred:
            errors += 1
    print("error on test set: %6.2f%%" % (100 * errors / test_size))
    print("regularization constant is 1e-%d" % round(log(C, 0.1)))