def main(feature_set): coef_list = [] for iteration in range(MAX_ITERATIONS): print 'iteration: %d\r' % (iteration + 1), x_train, x_test, y_train, y_test = get_regression_dataset( 0.6, feature_set=feature_set) # x_train, x_test = x_train[feature_set], x_test[feature_set] lr = LinearRegression() lr.fit(x_train, y_train) coef_list.append(lr.coef_) coef_list = np.array(coef_list) se = np.std(coef_list, 0) / np.sqrt(MAX_ITERATIONS) t = np.mean(coef_list, 0) / se pvalue = t_table.sf(np.fabs(t), len(t) - 1) * 2 coef_list = np.mean(coef_list, 0) print '\n\n{:25s} {:s} {:s} {:s} {:s}'.format( 'Field', 'COEF', 'Standard Error', 't-Statistics', 'P-value') print '================================================================================' for values in zip(feature_set, coef_list, se, t, pvalue): print '{:25s} {:3.4f} \t {:3.4f} \t {:3.4f} \t {:3.6f}'.format( *values) print '\n' print_errors(lr, x_train, y_train.values, x_test, y_test.values, msg='Full Features')
def run_bench_mark(): x_train, x_test, y_train, y_test = get_regression_dataset(random_state=0) print 'Linear Regression ===========================' logreg = LinearRegression() logreg.fit(x_train, y_train) print_errors(logreg, x_train, y_train, x_test, y_test) # save_coefs(logreg, path.join('regression', 'results', 'linear.coef')) print 'Ridge Regression ============================' clf = Ridge(alpha=1.0) clf.fit(x_train, y_train) print_errors(clf, x_train, y_train, x_test, y_test) # save_coefs(clf, path.join('regression', 'results', 'ridge.coef')) print 'Lasso Regression ============================' lso = Lasso(alpha=0.1) lso.fit(x_train, y_train) print_errors(lso, x_train, y_train, x_test, y_test) # save_coefs(lso, path.join('regression', 'results', 'lasso.coef')) print 'ElasticNet Regression =======================' eln = ElasticNet(random_state=0) eln.fit(x_train, y_train) print_errors(eln, x_train, y_train, x_test, y_test)
def engine(): best_r2 = None best_depth = None for i in range(2, 20): r2 = main(i) if best_r2 is None or r2 < best_r2: best_r2 = r2 best_depth = i x_train, x_test, y_train, y_test = get_regression_dataset() regr = DecisionTreeRegressor(max_depth=best_depth) regr.fit(x_train, y_train) print_errors(regr, x_train, y_train, x_test, y_test, msg='Tree-Based Regression')
def main(k): x_train, x_test, y_train, y_test = get_classification_dataset(0.3) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) models = [model_1, model_2, model_3, model_4, model_5] for m in models: mdl = m() k_fold(mdl, x_train, y_train, k) print_errors(mdl, x_train, y_train, x_test, y_test, msg=mdl.name, prf=True) plt.show()
def main(k): x_train, x_test, y_train, y_test = get_classification_dataset(0.3) lr = k_fold(LogisticRegression(), x_train, y_train, k) print_errors(lr, x_train, y_train, x_test, y_test, msg='Logistic Regression', prf=True) lda = k_fold(LDA(), x_train, y_train, k) print_errors(lda, x_train, y_train, x_test, y_test, msg='Linear Discriminant Analysis', prf=True) qda = k_fold(QDA(), x_train, y_train, k) print_errors(qda, x_train, y_train, x_test, y_test, msg='Quadratic Discriminant Analysis', prf=True) gnb = k_fold(NB(), x_train, y_train, k) print_errors(gnb, x_train, y_train, x_test, y_test, msg='Gaussian Naive Bayes', prf=True) lreg = LinearRegression() lreg.fit(x_train, y_train) print_errors(lreg, x_train, y_train, x_test, y_test, msg='Linear Regression', prf=True) plt.show()
def run_tree_based_classification(): x_train, x_test, y_train, y_test = get_classification_dataset(0.3) reg = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0) reg = k_fold(reg, x_train, y_train) print_errors(reg, x_train, y_train, x_test, y_test, msg='Random Forest Classifier', prf=True) reg = DecisionTreeClassifier() reg = k_fold(reg, x_train, y_train) print_errors(reg, x_train, y_train, x_test, y_test, msg='Decision Tree Classifier', prf=True) reg = SVC(gamma='scale', probability=True) reg = k_fold(reg, x_train, y_train) print_errors(reg, x_train, y_train, x_test, y_test, msg='Support Vector Classifier (SVC)', prf=True) plt.show()
def test(self, test_set): """Using a new set of documents, tests the accuracy of the classifier. # It is important the classifier has not been previously trained on the test set.""" tested = 0 correct = 0 errors = [] level = 0 # 0=class, 1=subclass print ('Testing with %d documents...' % (len(test_set))) for doc in test_set: actual = doc.get_labels() # [class_label, subclass_label] p_class = self.classify(doc) p_subclass = self.subclassify(doc, actual[0]) # use correct subclassifier for now predicted = [p_class, p_subclass] if actual[0] == predicted[0]: correct += 1 else: errors.append((actual, predicted, doc)) tested += 1 accuracy = float(correct/tested) print 'Accuracy=%f, tested=%d, correct=%d, errors=%d' % (accuracy, tested, correct, len(errors)) utils.print_errors(errors) return accuracy
losses = [] epochs = 25000 for epoch in range(epochs): y_pred = model(X_train) loss = F.smooth_l1_loss(y_pred, Y_train) optimizer.zero_grad() loss.backward() optimizer.step() print ("Epoch " + str(epoch)+ ". Loss: " + str(loss.data.numpy())) losses.append(loss.data.numpy()) torch.save(model, '../cnn_v2_weights.pt') t = time.time() y_pred = model(X_test) print("Time for full set", time.time() - t) test_loss = F.smooth_l1_loss(y_pred, Y_test) print("Final Loss") print(test_loss) print("Trained on " + str(len(X_train)) + ", Tested on " + str(len(X_test)) + " samples") from utils import print_errors print_errors(Y_test.detach().numpy(), y_pred.detach().numpy(), "TEST Errors:")
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 0) # Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) # Save Scale from sklearn.externals import joblib train_polyreg = True if train_polyreg == True: # Train Regression regression_order = 1 y_pred = train_regression_and_predict(regression_order, X_train, y_train, X_test) joblib.dump(sc, "../models/regression_scale.pkl") else: # Train ANN y_pred = train_ann_and_predict(X_train, y_train, X_test) joblib.dump(sc, "../models/ann_scale.pkl") # TEST from utils import print_errors print_errors(y_test, y_pred, "TEST Errors:") print "Trained on " + str(len(X_train)) + " samples" print "Tested on " + str(len(y_test)) + " samples"