def main(): X_train, y_train = load_features(sys.argv[1]) X_dev, y_dev = load_features(sys.argv[2]) X_test, y_test = load_features(sys.argv[3]) C_list = [0.001, 0.01, 0.1, 1.0, 10., 100.] train_accuracies = list() dev_accuracies = list() test_accuracies = list() for C in C_list: model = train(X_train, y_train, C) train_accuracy = model.score(X_train, y_train) dev_accuracy = model.score(X_dev, y_dev) test_accuracy = model.score(X_test, y_test) train_accuracies.append(train_accuracy) dev_accuracies.append(dev_accuracy) test_accuracies.append(test_accuracy) plt.ylim(.7, 1.) plt.xscale('log') plt.grid(True) plt.plot(C_list, train_accuracies, label='train') plt.plot(C_list, dev_accuracies, label='dev') plt.plot(C_list, test_accuracies, label='test') plt.legend() plt.show()
def main(): X_train, y_train = load_features(sys.argv[1]) X_test, y_test = load_features(sys.argv[2]) model = train(X_train, y_train) pred = model.predict(X_test) print(classification_report(y_test, pred))
def main(): X_train, y_train = load_features(sys.argv[1]) X_test, y_test = load_features(sys.argv[2]) model = train(X_train, y_train) train_score = model.score(X_train, y_train) test_score = model.score(X_test, y_test) print('train score = ' + str(train_score)) print('test score = ' + str(test_score))
def main(): X_train, y_train = load_features(sys.argv[1]) X_test, y_test = load_features(sys.argv[2]) model = train(X_train, y_train) train_prob = model.predict_proba(X_train) test_prob = model.predict_proba(X_test) print('train[:5] =') print(train_prob[:5]) print('test[:5] =') print(test_prob[:5])
def main(): X_train, y_train = load_features(sys.argv[1]) X_test, y_test = load_features(sys.argv[2]) model = train(X_train, y_train) coef = model.coef_.tolist() for i,c in enumerate(coef): print('class ' + class_names[i]) order = [y[1] for y in sorted([(x, j) for j,x in enumerate(c)])] print('best 10 = {}'.format(order[:10])) print('worst 10 = {}'.format(order[:10][::-1]))
def main(): X_train, y_train = load_features(sys.argv[1]) X_dev, y_dev = load_features(sys.argv[2]) X_test, y_test = load_features(sys.argv[3]) params = list() # LogisticRegression for penalty in ('l1', 'l2'): for C in (0.01, 0.1, 1., 10.): for solver in ('newton-cg', 'liblinear') if penalty == 'l2' else ( 'liblinear', ): args = { 'penalty': penalty, 'C': C, 'solver': solver, 'multi_class': 'auto', 'random_state': 0 } param = train(X_train, y_train, X_dev, y_dev, 'logistic regression', LogisticRegression, args) params.append(param) # KNeighborsClassifier for n_neighbors in (2, 3, 5, 10): args = {'n_neighbors': n_neighbors} param = train(X_train, y_train, X_dev, y_dev, 'k neighbors', KNeighborsClassifier, args) params.append(param) # DecisionTreeClassifier for max_depth in (1, 3, 5, 10, None): args = {'max_depth': max_depth, 'random_state': 0} param = train(X_train, y_train, X_dev, y_dev, 'decision tree', DecisionTreeClassifier, args) params.append(param) # LinearSVC for C in (0.01, 0.1, 1., 10.): args = {'C': C, 'random_state': 0} param = train(X_train, y_train, X_dev, y_dev, 'SVM', LinearSVC, args) params.append(param) bestparam = sorted(params, key=lambda x: x[-1], reverse=True)[0] name, args, model, dev_accuracy = bestparam accuracy = model.score(X_test, y_test) print('name = {}'.format(name)) print('args = {}'.format(args)) print('dev accuracy = {}'.format(dev_accuracy)) print('test accuracy = {}'.format(accuracy))
def main(): X_train, y_train = load_features(sys.argv[1]) X_test, y_test = load_features(sys.argv[2]) model = train(X_train, y_train) y_train_pred = model.predict(X_train) y_test_pred = model.predict(X_test) c_train_matrix = confusion_matrix(y_train, y_train_pred) c_test_matrix = confusion_matrix(y_test, y_test_pred) print('train =') print(c_train_matrix) print('test =') print(c_test_matrix)