def main(train_ratio=0.9): options['show_progress'] = False xs, ys = shuffle_args(*retrieve_data(as_list=True, negative_label=-1.0, positive_label=1.0, insert_bias=False)) xs_train, xs_test = split_with_ratio(xs, train_ratio) ys_train, ys_test = split_with_ratio(ys, train_ratio) C = optimal_regularizer(xs_train, ys_train, linear_kernel) print('best C=%4.1e' % C) w, b = train_svm(xs_train, ys_train, C, linear_kernel) stats = {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0} for x, y in zip(xs_test, ys_test): yc = classify(x, w, b) if int(y) == -1: key = 'tn' if yc == -1 else 'fp' stats[key] += 1 if int(y) == 1: key = 'tp' if yc == 1 else 'fn' stats[key] += 1 precision = safe_division(stats['tp'], stats['tp'] + stats['fp']) recall = safe_division(stats['tp'], stats['tp'] + stats['fn']) f1 = safe_division(2 * precision * recall, precision + recall) print('precision = %6.2f%%' % (100 * precision)) print('recall = %6.2f%%' % (100 * recall)) print('F1 measure = %6.2f' % f1)
def main(test_fraction): xs, ys = retrieve_data() test_size = int(len(xs) * test_fraction) w = train_perceptron(xs[:-test_size], ys[:-test_size], iters=10000) # 'tp' — true positive # 'tn' — true negative # 'fp' — false positive # 'fn' — false negative stats = {'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0} for x, y in zip(xs[-test_size:], ys[-test_size:]): yc = classify(w, x) if y == -1: key = 'tn' if yc == -1 else 'fp' stats[key] += 1 if y == 1: key = 'tp' if yc == 1 else 'fn' stats[key] += 1 precision = safe_division(stats['tp'], stats['tp'] + stats['fp']) recall = safe_division(stats['tp'], stats['tp'] + stats['fn']) f1 = safe_division(2 * precision * recall, precision + recall) print('precision = %6.2f%%' % (100 * precision)) print('recall = %6.2f%%' % (100 * recall)) print('F1 measure = %6.2f' % f1)
def main(): xs, ys = retrieve_data(as_list=True) print('debug: data retrieved') ratio = 0.9 xs_train, xs_test = split_with_ratio(xs, ratio) ys_train, ys_test = split_with_ratio(ys, ratio) train = (xs_train, ys_train) test = (xs_test, ys_test) C = 1e2 check_kernel(train, test, C, linear_kernel, 'linear') for i in range(2, 5): check_kernel(train, test, C, poly_kernel_wrapper(i), 'polynomial %d' % i) check_kernel(train, test, C, gaussian_kernel_wrapper(1.0), 'gaussian')
def main(test_fraction=0.1): xs, ys = retrieve_data(as_list=True, negative_label=0) test_size = int(len(xs) * test_fraction) train_xs = xs[:-test_size] train_ys = ys[:-test_size] C = best_regularization(train_xs, train_ys) model = svm.LinearSVC(C=C) model.fit(train_xs, train_ys) errors = 0 for x, x_pred in zip(list(ys[-test_size:]), list(model.predict(xs[-test_size:]))): if x != x_pred: errors += 1 print("error on test set: %6.2f%%" % (100 * errors / test_size)) print("regularization constant is 1e-%d" % round(log(C, 0.1)))