def main(feature_set):
    coef_list = []
    for iteration in range(MAX_ITERATIONS):
        print 'iteration: %d\r' % (iteration + 1),
        x_train, x_test, y_train, y_test = get_regression_dataset(
            0.6, feature_set=feature_set)
        # x_train, x_test = x_train[feature_set], x_test[feature_set]
        lr = LinearRegression()
        lr.fit(x_train, y_train)
        coef_list.append(lr.coef_)

    coef_list = np.array(coef_list)
    se = np.std(coef_list, 0) / np.sqrt(MAX_ITERATIONS)
    t = np.mean(coef_list, 0) / se
    pvalue = t_table.sf(np.fabs(t), len(t) - 1) * 2
    coef_list = np.mean(coef_list, 0)

    print '\n\n{:25s}   {:s}         {:s}  {:s}     {:s}'.format(
        'Field', 'COEF', 'Standard Error', 't-Statistics', 'P-value')
    print '================================================================================'
    for values in zip(feature_set, coef_list, se, t, pvalue):
        print '{:25s}   {:3.4f} \t    {:3.4f} \t    {:3.4f} \t  {:3.6f}'.format(
            *values)
    print '\n'
    print_errors(lr,
                 x_train,
                 y_train.values,
                 x_test,
                 y_test.values,
                 msg='Full Features')
Beispiel #2
0
def run_bench_mark():
    x_train, x_test, y_train, y_test = get_regression_dataset(random_state=0)

    print 'Linear Regression ==========================='
    logreg = LinearRegression()
    logreg.fit(x_train, y_train)
    print_errors(logreg, x_train, y_train, x_test, y_test)
    # save_coefs(logreg, path.join('regression', 'results', 'linear.coef'))

    print 'Ridge Regression ============================'
    clf = Ridge(alpha=1.0)
    clf.fit(x_train, y_train)
    print_errors(clf, x_train, y_train, x_test, y_test)
    # save_coefs(clf, path.join('regression', 'results', 'ridge.coef'))

    print 'Lasso Regression ============================'
    lso = Lasso(alpha=0.1)
    lso.fit(x_train, y_train)
    print_errors(lso, x_train, y_train, x_test, y_test)
    # save_coefs(lso, path.join('regression', 'results', 'lasso.coef'))

    print 'ElasticNet Regression ======================='
    eln = ElasticNet(random_state=0)
    eln.fit(x_train, y_train)
    print_errors(eln, x_train, y_train, x_test, y_test)
Beispiel #3
0
def engine():
    best_r2 = None
    best_depth = None

    for i in range(2, 20):
        r2 = main(i)
        if best_r2 is None or r2 < best_r2:
            best_r2 = r2
            best_depth = i
    x_train, x_test, y_train, y_test = get_regression_dataset()
    regr = DecisionTreeRegressor(max_depth=best_depth)
    regr.fit(x_train, y_train)
    print_errors(regr,
                 x_train,
                 y_train,
                 x_test,
                 y_test,
                 msg='Tree-Based Regression')
def main(k):
    x_train, x_test, y_train, y_test = get_classification_dataset(0.3)
    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    models = [model_1, model_2, model_3, model_4, model_5]

    for m in models:
        mdl = m()
        k_fold(mdl, x_train, y_train, k)
        print_errors(mdl,
                     x_train,
                     y_train,
                     x_test,
                     y_test,
                     msg=mdl.name,
                     prf=True)

    plt.show()
def main(k):
    x_train, x_test, y_train, y_test = get_classification_dataset(0.3)

    lr = k_fold(LogisticRegression(), x_train, y_train, k)
    print_errors(lr, x_train, y_train, x_test, y_test, msg='Logistic Regression', prf=True)

    lda = k_fold(LDA(), x_train, y_train, k)
    print_errors(lda, x_train, y_train, x_test, y_test, msg='Linear Discriminant Analysis', prf=True)

    qda = k_fold(QDA(), x_train, y_train, k)
    print_errors(qda, x_train, y_train, x_test, y_test, msg='Quadratic Discriminant Analysis', prf=True)

    gnb = k_fold(NB(), x_train, y_train, k)
    print_errors(gnb, x_train, y_train, x_test, y_test, msg='Gaussian Naive Bayes', prf=True)

    lreg = LinearRegression()
    lreg.fit(x_train, y_train)
    print_errors(lreg, x_train, y_train, x_test, y_test, msg='Linear Regression', prf=True)

    plt.show()
def run_tree_based_classification():
    x_train, x_test, y_train, y_test = get_classification_dataset(0.3)

    reg = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
    reg = k_fold(reg, x_train, y_train)
    print_errors(reg,
                 x_train,
                 y_train,
                 x_test,
                 y_test,
                 msg='Random Forest Classifier',
                 prf=True)

    reg = DecisionTreeClassifier()
    reg = k_fold(reg, x_train, y_train)
    print_errors(reg,
                 x_train,
                 y_train,
                 x_test,
                 y_test,
                 msg='Decision Tree Classifier',
                 prf=True)

    reg = SVC(gamma='scale', probability=True)
    reg = k_fold(reg, x_train, y_train)
    print_errors(reg,
                 x_train,
                 y_train,
                 x_test,
                 y_test,
                 msg='Support Vector Classifier (SVC)',
                 prf=True)

    plt.show()
Beispiel #7
0
 def test(self, test_set):
     """Using a new set of documents, tests the accuracy of the classifier. 
     
     # It is important the classifier has not been previously trained on the test set."""
     tested = 0
     correct = 0
     errors = []
     level = 0   # 0=class, 1=subclass
     print ('Testing with %d documents...' % (len(test_set)))
     for doc in test_set:
         actual = doc.get_labels()   # [class_label, subclass_label]
         p_class = self.classify(doc)
         p_subclass = self.subclassify(doc, actual[0])   # use correct subclassifier for now
         predicted = [p_class, p_subclass]
         if actual[0] == predicted[0]:
             correct += 1
         else:
             errors.append((actual, predicted, doc))
         tested += 1
     accuracy = float(correct/tested)
     print 'Accuracy=%f, tested=%d, correct=%d, errors=%d' % (accuracy, tested, correct, len(errors))
     utils.print_errors(errors)
     return accuracy
losses = []

epochs = 25000
for epoch in range(epochs):
    
    y_pred = model(X_train)

    loss = F.smooth_l1_loss(y_pred, Y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print ("Epoch " + str(epoch)+ ". Loss: " + str(loss.data.numpy()))
    losses.append(loss.data.numpy())
    
torch.save(model, '../cnn_v2_weights.pt')

t = time.time()
y_pred = model(X_test)
print("Time for full set", time.time() - t)
test_loss = F.smooth_l1_loss(y_pred, Y_test)

print("Final Loss")
print(test_loss)

print("Trained on " + str(len(X_train)) + ", Tested on " + str(len(X_test)) + " samples")

from utils import print_errors
print_errors(Y_test.detach().numpy(), y_pred.detach().numpy(), "TEST Errors:")
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Save Scale
from sklearn.externals import joblib

train_polyreg = True

if train_polyreg == True:    
    # Train Regression 
    regression_order = 1
    y_pred = train_regression_and_predict(regression_order, X_train, y_train, X_test)
    joblib.dump(sc, "../models/regression_scale.pkl")
else:    
    # Train ANN
    y_pred = train_ann_and_predict(X_train, y_train, X_test)
    joblib.dump(sc, "../models/ann_scale.pkl")

# TEST
from utils import print_errors
print_errors(y_test, y_pred, "TEST Errors:")
print "Trained on " + str(len(X_train)) + " samples"
print "Tested on " + str(len(y_test)) + " samples"