Ejemplos de lr en Python, ejemplos de sklearn.linear_model.lr en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: wind_classes.py Proyecto: zeshi1990/swe_reconstruction

    def nldas_correlate(self):
        nldas_list = pickle.load(open("wind/nldas.p", "rb"))
        nldas = None
        for temp in nldas_list:
            if self.station_id == temp.station_id:
                nldas = temp
                break
        nldas_idx = np.where(np.logical_and(nldas.date >= np.min(self.date), nldas.date <= np.max(self.date)))
        nldas_wind_speed_anomaly = nldas.wind_speed_anomaly[nldas_idx]
        nldas_wind_dir_anomaly = nldas.wind_dir_anomaly[nldas_idx]

        fit_lr = lr()
        # mask1 = self.reject_outliers(self.wind_speed_anomaly)
        mask1 = ~np.isnan(self.wind_speed_anomaly)
        fit_lr.fit(nldas_wind_speed_anomaly[mask1].reshape((len(nldas_wind_speed_anomaly[mask1]), 1)), self.wind_speed_anomaly[mask1])
        result1 = fit_lr.predict(nldas_wind_speed_anomaly[mask1].reshape((len(nldas_wind_speed_anomaly[mask1]), 1)))
        std = np.sqrt(np.sum((self.wind_speed_anomaly[mask1] - result1) ** 2) / (len(result1) - 2))
        print "Standard deviation of the wind speed estimate is", std

        fit_lr = lr()
        # mask2 = self.reject_outliers(self.wind_dir_anomaly)
        mask2 = ~np.isnan(self.wind_dir_anomaly)
        fit_lr.fit(nldas_wind_dir_anomaly[mask2].reshape((len(nldas_wind_dir_anomaly[mask2]), 1)), self.wind_dir_anomaly[mask2])
        result2 = fit_lr.predict(nldas_wind_dir_anomaly[mask2].reshape((len(nldas_wind_dir_anomaly[mask2]), 1)))
        std = np.sqrt(np.sum((self.wind_dir_anomaly[mask2] - result2) ** 2) / (len(result2) - 2))
        print "Standard deviation of the wind direction estimate is", std

        fig = plt.figure()
        ax1 = fig.add_subplot(211)
        ax1.plot(nldas_wind_speed_anomaly[mask1], self.wind_speed_anomaly[mask1], '.b')
        ax1.plot(nldas_wind_speed_anomaly[mask1], result1, '-r')
        ax2 = fig.add_subplot(212)
        ax2.plot(nldas_wind_dir_anomaly[mask2], self.wind_dir_anomaly[mask2], '.g')
        ax2.plot(nldas_wind_dir_anomaly[mask2], result2, '-r')
        plt.show()

Ejemplo n.º 2

0

Mostrar archivo

Archivo: coronary_predict.py Proyecto: bfetler/coronary_disease

def main():
    plotdir = make_plotdir()
    train_X, test_X, train_y, test_y = load_data('cleveland', plotdir, print_out=False)
#   X_labels = list(train_X.columns)
    test_incoming(test_X, train_X)
    
    plot_hists(train_X, plotdir, label='Train')
    plot_hists(test_X, plotdir, label='Test')
    
    scale_cols = ['age','b_pressure','cholesterol','heart_rate','exer_depress','fluor_count']
    train_X, test_X = scale_data(train_X, test_X, scale_cols)
#   one_hot_cols = ['chest_pain','ecg_type','exer_slope','thal_defect']
    one_hot_cols = ['chest_pain']
    train_X, test_X = one_hot_encode(train_X, test_X, one_hot_cols)
#   print('one hot encode train_X head\n', train_X[:3])
    X_labels = list(train_X.columns)
    
    clf = lr()
    fit_predict(clf, train_X, train_y, test_X, test_y, label='logistic')
    cross_validate(clf, train_X, train_y['Y'], print_out=True)
    print_lr_coefs(clf, X_labels)

    clf = LinearSVC()   # data must first be scaled
    fit_predict(clf, train_X, train_y, test_X, test_y, label='svc')
    cross_validate(clf, train_X, train_y['Y'], print_out=True)
    
    explore_pca(train_X)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: classify_test.py Proyecto: anukat2015/ARKcat

def classify(train_data_filename, train_label_filename, dev_data_filename, dev_label_filename, 
             train_feature_dir, dev_feature_dir, feature_list, model_type='LR', 
             regularizer='l1', alpha=1.0, converg_tol=0.01, verbose=1, folds=2, n_jobs=-1, score_eval='f1'):
    
    if model_type == 'LR':
        model = lr(penalty=regularizer, C=alpha, tol=converg_tol)
    elif model_type == 'SVM':
        model = svm.LinearSVC(penalty=regularizer, C=alpha, tol=converg_tol)
    else:
        sys.exit('Model type ' + model_type + ' not supported')

    train_X, train_Y = load_features(train_data_filename, train_label_filename, train_feature_dir, 
                                     feature_list, verbose)
    #if we have separate dev data, so we don't need cross validation
    if folds < 1:
        # Try loading dev data using train vocabulary, and not saving dev feature extractions
        dev_X, dev_Y = load_features(dev_data_filename, dev_label_filename, dev_feature_dir,
                                     feature_list, verbose, vocab_source=train_feature_dir)

        dev_f1, dev_acc, train_f1, train_acc = compute_evaluation_metrics(train_X, train_Y, dev_X, dev_Y, model)
        print('train acc: ' + str(train_acc))
        print('dev acc: ' + str(dev_acc))
        neg_loss = dev_acc
    #if we don't have separate dev data, so we need cross validation
    else:
        skf = StratifiedKFold(train_Y, folds,random_state=17)
        neg_loss = cross_val_score(model, train_X, train_Y, cv=skf,scoring=score_eval,n_jobs=n_jobs).mean()
        print('crossvalidation f1: ' + str(f1))

    return {'loss': -neg_loss, 'status': STATUS_OK, 'model': model}

Ejemplo n.º 4

0

Mostrar archivo

Archivo: linearmodel.py Proyecto: cnh/KaunHaiSacheyVoters_Better-India-BPAC

def trainModel():
	fh = open('train.features')
	X = []
	for x in fh:
		x = x.strip()
		x = x.split(',')
		x = [int(x1) for x1 in x]

		X.append(x)
	fh.close()
	fh = open('train.labels')
	Y = []
	for y in fh:
		y = y.strip()
		Y.append(int(y))
	fh.close()
	clf = lr()
	clf.fit(X,Y)
	print sigmoid(clf.predict(X[45]))
	print clf.coef_
	#np.save("lr_coeff",clf.coef_)
	print clf.intercept_
	#np.save("lr_intercept",clf.intercept_)
	score = np.dot(clf.coef_, X[45])+ clf.intercept_
	print sigmoid(score)

	coeff = np.load("lr_coeff.npy")
	intercept = np.load("lr_intercept.npy")
	score = np.dot(coeff, X[45]) + intercept
	print sigmoid(score)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: preprocessing_surf.py Proyecto: prakhar897/ISLR

def predict_lr(X_train, X_test, y_train, y_test):
    clf = lr()
    print("lr started")
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_test)
    calc_accuracy("Logistic regression",y_test,y_pred)
    np.savetxt('submission_surf_lr.csv', np.c_[range(1,len(y_test)+1),y_pred,y_test], delimiter=',', header = 'ImageId,Label,TrueLabel', comments = '', fmt='%d')
    return clf

Ejemplo n.º 6

0

Mostrar archivo

Archivo: final_project.py Proyecto: T-Goon/ML-Final-Project

def main():
    if (not path.exists('training_images_pos0.npy')
            and not path.exists('training_image_neg0.npy')):
        convert_to_numpy()

        # (# samples, 256, 128, 3)
    train_pos = np.load('training_images_pos0.npy')
    train_neg = np.load('training_images_neg0.npy')
    print(train_pos.shape)
    print(train_neg.shape)

    # flatten images
    train_pos = preprocessing.minmax_scale(
        train_pos.reshape((train_pos.shape[0], 32768)))
    train_neg = preprocessing.minmax_scale(
        train_neg.reshape((train_neg.shape[0], 32768)))

    pos_label = np.ones(train_pos.shape[0])
    neg_label = np.zeros(train_neg.shape[0])

    trainX = np.concatenate((train_pos, train_neg))
    trainY = np.concatenate((pos_label, neg_label))

    idxs = np.random.permutation(trainX.shape[0])

    trainX = trainX[idxs]
    trainY = trainY[idxs]

    # train_pos = np.concatenate((train_pos, pos_label), axis=1)
    # train_neg = np.concatenate((train_neg, neg_label), axis=1)
    model = lr()
    clf = model.fit(trainX, trainY)
    pickle.dump(model, open('model.sav', 'wb'))

    test_pos = np.load('testing_images_pos0.npy')
    test_neg = np.load('testing_images_neg0.npy')
    print(test_pos.shape)
    print(test_neg.shape)

    test_pos = preprocessing.minmax_scale(
        test_pos.reshape((test_pos.shape[0], 32768)))
    test_neg = preprocessing.minmax_scale(
        test_neg.reshape((test_neg.shape[0], 32768)))

    pos_label_test = np.ones(test_pos.shape[0])
    neg_label_test = np.zeros(test_neg.shape[0])

    testX = np.concatenate((test_pos, test_neg))
    testY = np.concatenate((pos_label_test, neg_label_test))

    idxs = np.random.permutation(testX.shape[0])

    testX = testX[idxs]
    testY = testY[idxs]

    print(clf.score(testX, testY))
    roc_auc_score(testY, clf.predict_proba(testX)[:, 1])

Ejemplo n.º 7

0

Mostrar archivo

def run_lr():
	clf = lr()
	print("lr started")
	clf.fit(x,y)
	#print clf.n_layers_
	pred=clf.predict(x_)
	#print(pred)
	np.savetxt('submission_lr.csv', np.c_[range(1,len(test)+1),pred,label_test], delimiter=',', header = 'ImageId,Label,TrueLabel', comments = '', fmt='%d')
	calc_accuracy("Logistic regression",label_test,pred)

Ejemplo n.º 8

0

Mostrar archivo

def main():
    dfcol, dups = readRawColumns()
    dftrain, dftrain_y, dftest, dftest_y = readRawData(dfcol)

    dftrain = renameColumns(dftrain)
    dftest = renameColumns(dftest)
    print("dftrain shape head", dftrain.shape, "\n", dftrain[:3])
    print("dftest shape head", dftest.shape, "\n", dftest[:3])
    print("dftrain stats\n", dftrain.describe())
    # groupby subject, activity(y) ?
    #    print("dftrain group by subject stats\n", dftrain.groupby('subject').describe())

    make_plotdir()
    explore_pca(dftrain, dftest, "all")  # 562 columns

    clf = LinearSVC()
    print("fitting LinearSVC")
    fit_predict(clf, dftrain, dftrain_y, dftest, dftest_y,
                'raw data, all cols')
    fit_predict(clf, dftrain.ix[:, :30], dftrain_y, dftest.ix[:, :30],
                dftest_y, 'raw data, 30 cols')
    # 30 columns not sorted by pca - only 70% accuracy

    X_train, X_test = quick_pca(dftrain, dftest, ncomps=100)

    print("fitting LinearSVC with PCA input")
    preds = []
    for j in [10, 20, 30, 50, 100]:
        p = fit_predict(clf, X_train[:, :j], dftrain_y, X_test[:, :j],
                        dftest_y, 'pca {:d} cols'.format(j))
        preds.append((j, p))
    plot_pca_fit(preds, "svc", "SVC")

    do_svc_gridsearch(X_train[:, :30], dftrain_y)

    print("Cross-validating LinearSVC with PCA input")
    get_cv_scores(clf, X_train[:, :30],
                  dftrain_y)  # randomized, not grouped by subject
    # 30 columns sorted by pca - 89% accuracy

    clf = lr()
    print("fitting Logistic Regression with PCA input")
    preds = []
    for j in [10, 20, 30, 50, 100]:
        p = fit_predict(clf, X_train[:, :j], dftrain_y, X_test[:, :j],
                        dftest_y, 'pca {:d} cols'.format(j))
        preds.append((j, p))
    plot_pca_fit(preds, "lr", "Logistic Regression")
    print("Cross-validating Logistic Regression with PCA input")
    get_cv_scores(clf, X_train[:, :30], dftrain_y)

    txt = '''\nConclusion: Using PCA as input to Logistic Regression or LinearSVC is effective, 
with 91% accuracy using only 30 components (5.4% of 562 total).  For six 
predicted classes, a classification report shows precision of 85% and greater 
(also confirmed by confusion matrix).  Cross-validation gives average fit 
scores of 89% +- 5%.'''
    print(txt)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: utilities.py Proyecto: natnij/timeseries

def regressMissingData(x, y, xnew, robust=True):
    '''
    linear or robust linear regression to fill in missing data.
    
    author: Nat
    
    input: 
        x: independent variables with corresponding dependent variable y
        xnew: independent variables with MISSING dependent variable y
        y: dependent variable which is known
    output:
        ynew: regressed y value where y is missing       
    '''
    import pandas as pd
    from sklearn.linear_model import LinearRegression as lr
    m = lr()
    m.fit(x, y)
    ynew_lr = pd.DataFrame(m.predict(xnew), columns=['WON_MONTH2'])

    from sklearn.linear_model import RANSACRegressor as ransac
    m_ransac = ransac(lr())
    m_ransac.fit(x, y)
    ynew_ransac = pd.DataFrame(m_ransac.predict(xnew), columns=['WON_MONTH2'])
    #    import numpy as np
    #    from matplotlib import pyplot as plt
    #    yhat_lr = pd.DataFrame(m.predict(x))
    #    yhat_ransac = pd.DataFrame(m_ransac.predict(x))
    #    inlier_mask = m_ransac.inlier_mask_
    #    outlier_mask = np.logical_not(inlier_mask)
    #    plt.scatter(x[inlier_mask], y[inlier_mask],
    #                color='green', marker='.',
    #                label='Inliers')
    #    plt.scatter(x[outlier_mask], y[outlier_mask],
    #                color='red', marker='.',
    #                label='Outliers')
    #    plt.plot(pd.concat([x,xnew]), pd.concat([yhat_ransac, ynew_ransac]), '-',
    #             label='RANSAC regressor')
    #    plt.plot(pd.concat([x,xnew]), pd.concat([yhat_lr, ynew_lr]), '-',
    #             label='linear regressor')
    #    plt.show()
    if robust == True:
        return ynew_ransac
    else:
        return ynew_lr

Ejemplo n.º 10

0

Mostrar archivo

Archivo: ZSL_graph_algorithm_4.0.py Proyecto: sailfish009/Graph_ZSL

 def train_edge_classification(X_train, Y_train):
     """
     train  the classifier with the train set.
     :param X_train: The features' edge- norm (train set).
     :param Y_train: The edges labels- 0 for true, 1 for false (train set).
     :return: The classifier
     """
     classif2 = TopKRanker(lr())
     classif2.fit(X_train, Y_train)
     return classif2

Ejemplo n.º 11

0

Mostrar archivo

Archivo: learn_classifier.py Proyecto: benbo/QPR_CP1

 def get_model(self,args):
     if args['model']['model'] == 'LR':
         model = lr(penalty=args['model']['regularizer_lr'], C=args['model']['C_lr'],n_jobs=self.cjobs)
     elif args['model']['model'] == 'SVM':
         if args['model']['regularizer_svm'] == 'l1':
             #squared hinge loss not available when penalty is l1. 
             model = svm.LinearSVC(C=args['model']['C_svm'], penalty=args['model']['regularizer_svm'],dual=False,n_jobs=self.cjobs)#loss='hinge')
         else:
             model = svm.LinearSVC(C=args['model']['C_svm'], penalty=args['model']['regularizer_svm'],n_jobs=self.cjobs)
     return model

Ejemplo n.º 12

0

Mostrar archivo

def get_classifier():
    x = query_features[query_features.columns[2:23]]
    y = query_features[query_features.columns[-1]]
    x_train, x_test, y_train, y_test = sk_model.train_test_split(x,
                                                                 y,
                                                                 test_size=0.2)

    clf = lr(max_iter=1000).fit(x_train, y_train)

    return clf

Ejemplo n.º 13

0

Mostrar archivo

def fit_and_test():
    data, target = pd.read_train()

    train_x, val_x, train_y, val_y = t(data, target, test_size=0.1)

    m = lr()
    m.fit(train_x, train_y)

    print("Score on validation")
    print(m.score(val_x, val_y))

Ejemplo n.º 14

0

Mostrar archivo

Archivo: MLProject.py Proyecto: Fy1717/HelloWorldMLDjango

def linear_regression(x, y):
    lineerreg = lr(
    )  #sklearn lineer regresyon modelini 'lineerreg' adıyla kullancağız
    lineerreg.fit(
        x, y)  # örneğin veri üzerinde öğrenmesi fit fonksiyonuyla yapılıyor
    lineerreg.predict(x)  #tahmin fonksiyoru
    m = lineerreg.coef_  #eğim
    b = lineerreg.intercept_  #b değeri
    plt.scatter(x, y)  # matplotlib ile noktaları gösterme
    plt.plot(x, lineerreg.predict(x), c="red")  # doğruyu çizdirme
    plt.show()  # çizilen grafiği göster

Ejemplo n.º 15

0

Mostrar archivo

Archivo: more_runs.py Proyecto: sailfish009/Graph_ZSL

def train_edge_classification(X_train, Y_train):
    """
    Predictions of nodes' labels.
    :param X: The features' graph- norm
    :param Y: The edges labels- 0 for true, 1 for false
    :param test_ratio: To determine how to split the data into train and test
    :return: Scores- F1-macro, F1-micro accuracy and auc
    """
    classif2 = TopKRanker(lr())
    classif2.fit(X_train, Y_train)
    return classif2

Ejemplo n.º 16

0

Mostrar archivo

 def construct_all_models(self, hyperTune):
     if hyperTune:
         #3 models KNN SCM and LR
         self.models={'SVM':[SVC(kernel='linear',probability=True),dict(C=np.arange(0.01, 2.01, 0.2))],\
                      'LogisticRegression':[lr(),dict(C=np.arange(0.1,3,0.1))],\
                      'KNN':[KNeighborsClassifier(),dict(n_neighbors=range(1, 100))],}
         for name, candidate_hyperParam in self.models.items():
             #update each classifier after training and tuning
             self.models[name] = self.train_with_hyperParamTuning(
                 candidate_hyperParam[0], name, candidate_hyperParam[1])
         print('\nTraining process finished\n\n\n')

Ejemplo n.º 17

0

Mostrar archivo

Archivo: evaluation.py Proyecto: ZagHe568/personalized_graph_embedding

def eval_node_classification(X_train, Y_train, X_test, Y_test):

    # y_train = (n_sample, n_classes)
    top_k_list = list(Y_test.sum(axis=1))
    classif2 = TopKRanker(lr(solver='liblinear'))
    classif2.fit(X_train, Y_train)
    prediction = classif2.predict(X_test, top_k_list)
    micro = f1_score(Y_test, prediction, average='micro')
    macro = f1_score(Y_test, prediction, average='macro')

    return micro, macro

Ejemplo n.º 18

0

Mostrar archivo

Archivo: test1.py Proyecto: wqmike123/thesis

def getR2(y_actual, factor, isRet=False):
    n = len(y_actual)
    y = np.array(y_actual).reshape((n, 1))
    x = np.array(factor).reshape((n, 1))
    if isRet:
        n = n - 1
        y = np.log(y[1:] / y[:-1])
        x = x[:-1]
    reg = lr()
    reg.fit(x, y)
    return r2_score(y, reg.predict(x))

Ejemplo n.º 19

0

Mostrar archivo

Archivo: logistic_regression.py Proyecto: bfetler/lending_club

def explore_params(loans_X, loans_y, plotdir, app, appf):
    '''Explore fit parameters on training data,
       grid search of fit scores, boxplot gridsearch results.'''
    clf = lr()
    param_grid = [{'C': [0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]}]
    gs = GridSearchCV(estimator=clf, param_grid=param_grid, cv=10, \
      verbose=1, n_jobs=-1, scoring='accuracy')
    gs.fit(loans_X, loans_y)  # fit all grid parameters
    print("gs grid scores\n", gs.grid_scores_)
    print("gs best score %.5f %s\n%s" % \
      (gs.best_score_, gs.best_params_, gs.best_estimator_))
    gridscore_boxplot(gs.grid_scores_, plotdir, app, appf, "C", "solver='liblinear'")

Ejemplo n.º 20

0

Mostrar archivo

def cal_linear_reg_r(y, x=None):
    '''
    计算y中数据点的斜率（一元线性回归）
    y和x为list或pd.Series或np.array
    '''
    if isnull(x):
        X = pd.DataFrame({'X': range(0, len(y))})
    else:
        X = pd.DataFrame({'X': x})
    y = pd.Series(y)
    mdl = lr().fit(X, y)
    return mdl.coef_[0], mdl.intercept_

Ejemplo n.º 21

0

Mostrar archivo

Archivo: utilities.py Proyecto: natnij/timeseries

def myLr(x, y, xnew):
    '''
    calls sklearn.linear_model.LinearRegression
    wrapper author: Nat
    '''
    from sklearn.linear_model import LinearRegression as lr
    import numpy as np
    model = lr()
    model.fit(x, y)
    ynew = model.predict(xnew)
    ynew = np.where(ynew < 0, 0, ynew)
    return ynew

Ejemplo n.º 22

0

Mostrar archivo

def _plotDegreedist(degree_df, plot_model=False, path=None):
    """
    Args:
        degree_df (pandas.DataFrame): data_frame that include degree.
            degree info shold be stored in the column, "degree"

        plot_model (bool): Whether to plot linear approximation line.

        path (str): Folder path to save plots. If the folde does not exist in the path, the function create the folder.
            If None, plots will not be saved. Default is None.
    """

    from sklearn.linear_model import LinearRegression as lr
    df = degree_df.copy()

    dist = df.degree.value_counts() / df.degree.value_counts().sum()
    dist.index = dist.index.astype(np.int)

    fig, ax = plt.subplots(1, 2)

    ax[0].scatter(dist.index.values, dist.values, c="black")
    ax[0].set_title("degree distribution")
    ax[0].set_xlabel("k")
    ax[0].set_ylabel("P(k)")

    #plt.yscale('log')
    #plt.xscale('log')

    x = np.log(dist.index.values).reshape([-1, 1])
    y = np.log(dist.values).reshape([-1, 1])
    if plot_model:
        model = lr()
        model.fit(x, y)
        x_ = np.array([-1, 5]).reshape([-1, 1])
        y_ = model.predict(x_)

        ax[1].set_title(
            f"degree distribution (log scale)\nslope: {model.coef_[0][0] :.4g}, r2: {model.score(x,y) :.4g}"
        )
        ax[1].plot(x_.flatten(), y_.flatten(), c="black", alpha=0.5)
    else:
        ax[1].set_title(f"degree distribution (log scale)")

    ax[1].scatter(x.flatten(), y.flatten(), c="black")
    ax[1].set_ylim([y.min() - 0.2, y.max() + 0.2])
    ax[1].set_xlim([-0.2, x.max() + 0.2])
    ax[1].set_xlabel("log k")
    ax[1].set_ylabel("log P(k)")

    if path is not None:
        fig.savefig(path, transparent=True)
    plt.show()

Ejemplo n.º 23

0

Mostrar archivo

    def regression(self, metric="root_mean_squared_error", folds=10, alphas=[], graph=False):
        size = 1.3 * self.report_width // 10

        models = {}
        models["Linear regressor"]                  = lr()
        models["Lasso regressor"]                   = lassor()
        models["Lasso CV regressor"]                = lassocvr()
        models["Ridge regressor"]                   = rr(alpha=0, normalize=True)
        models["Ridge CV regressor"]                = rcvr(alphas = alphas)
        models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform')
        models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance')
        models["K nearest neighbors regressor K5"]  = knnr(n_neighbors=5)
        models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10)
        models["SGD regressor"]                     = sgdr(max_iter=10000, warm_start=True)
        models["Decision tree regressor"]           = dtr()
        models["Decision tree regressor D3"]        = dtr(max_depth=3)
        models["Random forest regressor"]           = rfr()
        models["Ada boost regressor"]               = abr()
        models["Gradient boost regressor"]          = gbr()
        models["Support vector regressor"]          = svr()
        self.models = models

        print('\n')
        print(self.report_width * '*', '\n*')
        print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*')
        #kf = StratifiedKFold(n_splits=folds, shuffle=True)
        kf = KFold(n_splits=folds)
        results = []
        names = []
        for model_name in models:
            cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train.values.ravel(), cv=kf, scoring=metric)  
            results.append(cv_scores)
            names.append(model_name)
        print(self.report_width * '*', '')
        report = pd.DataFrame({'Regressor': names, 'Score': results})
        report['Score (avg)'] = report.Score.apply(lambda x: x.mean())
        report['Score (std)'] = report.Score.apply(lambda x: x.std())
        report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)']
        report.sort_values(by='Score (avg)', inplace=True)
        report.drop('Score', axis=1, inplace=True)
        display(report)
        print('\n')
        if graph:
            fig, ax = plt.subplots(figsize=(size, 0.5 * size))
            plt.title('Regressor Comparison')
            #ax = fig.add_subplot(111)
            plt.boxplot(results)
            ax.set_xticklabels(names)
            plt.xticks(rotation=45)
            plt.subplots_adjust(hspace=0.0)
            plt.show()             
        return None

Ejemplo n.º 24

0

Mostrar archivo

    def test(self, model_name, graph=False):
        size = 1.3 * self.report_width // 10
        model = self.models[model_name]
        # fit using the train subset
        X, y = self.Xt_train, self.yt_train
        model.fit(X, y)

        # evaluate using the test subset
        X, y = self.Xt_test, self.yt_test
        
        if self.strategy == 'regression':
            y_hat = model.predict(X)
            # show residual analysis
            self.residual(y, y_hat, model_name, graph)
            if graph:
                # show the correlation between y and y_hat
                fig, ax = plt.subplots(figsize=(size, 0.5 * size))
                plt.title('Model Overall Performance')
                plt.scatter(y, y_hat, color='g')
                viewer = lr()
                plt.plot(y, viewer.fit(y, y_hat).predict(y), color='k')
                plt.xlabel('Observed')
                plt.ylabel('Predicted')
                plt.show()

        else:
            y_pred = model.predict(X)
            sample_size = len(y_pred)
            print('\n')
            print(self.report_width * '*', '\n*')
            print('* MODEL PERFORMANCE \n*')
            print('* MODEL NAME: ', model_name)
            print('* TEST SAMPLE SIZE: ', sample_size)
            print('* ACCURACY: ', round(accuracy_score(y, y_pred)*100, 1), '%')
            print('* ')
            print(self.report_width * '*', '\n')
            report = classification_report(y, y_pred, output_dict=True)
            if graph:
                fig, ax = plt.subplots(figsize=(size, 0.3 * size))
                plt.title('Confusion Matrix')
                sns.heatmap(confusion_matrix(y, y_pred), annot=True, cmap='YlGn', fmt='d',)
                plt.xlabel('Predicted')
                plt.ylabel('True Class')
                plt.show()
                fig, ax = plt.subplots(figsize=(size, 0.5 * size))
                plt.title('Classification Report')
                sns.heatmap(pd.DataFrame(report).iloc[0:3].T, annot=True, vmin=0, vmax=1, cmap='BrBG', fmt='.2g')
                plt.xlabel('Score')
                plt.show()
            else:
                display(pd.DataFrame(report).T)
        return None

Ejemplo n.º 25

0

Mostrar archivo

def log_reg(x, y, t, q):

    # Logistic Regression predictor initialization

    pred = lr(solver="saga", max_iter=200, multi_class="multinomial", tol=0.1)
    start = timer()  # Start timer
    pred.fit(x, y)  # Predictor training
    pred.result = pred.score(t, q)  # Predictor test
    pred.error = 1 - pred.result  # error probability
    pred.end = timer() - start  # End timer
    q = pred.predict(t)

    return q, pred

Ejemplo n.º 26

0

Mostrar archivo

def gs(x, y="prob"):
    #70/30 train test split
    x_train, x_test, y_train, y_test = tts(x, x.label, test_size=0.3)
    data = x_train.iloc[:, :32]
    test_data = x_test.iloc[:, :32]

    #train model
    classifier = lr(random_state=0).fit(data, y_train)
    if y == "prob":
        pred = classifier.predict_proba(test_data)
    else:
        pred = classifier.predict(test_data)
    return pred, y_test.values

Ejemplo n.º 27

0

Mostrar archivo

Archivo: build_linear_models.py Proyecto: Manjunathsk92/dbanalysis

def model_stop(df):
    #df = pd.get_dummies(df,columns=['day'])
    #features = ['day_'+str(i) for i in range(0,7)]
    #for f in features:
    #    if f not in df.columns:
    #        df[f] = 0
    df = df[df['traveltime'] < df['traveltime'].quantile(0.95)]
    features = ['rain','temp','vappr','hour','hour2','hour3','hour4','day','day2','day3','day4']
    for i in range(2,5):
        df['hour'+str(i)] = df['hour'] ** i
        df['day'+str(i)] = df['day'] ** i
    model = lr(fit_intercept=True).fit(df[features],df['traveltime'])
    return model,df,features

Ejemplo n.º 28

0

Mostrar archivo

def evaluateNodeClassification(X, Y, test_ratio):
    X_train, X_test, Y_train, Y_test = sk_ms.train_test_split(
        X, Y, test_size=test_ratio)
    try:
        top_k_list = list(Y_test.toarray().sum(axis=1))
    except:
        top_k_list = list(Y_test.sum(axis=1))
    classif2 = TopKRanker(lr())
    classif2.fit(X_train, Y_train)
    prediction = classif2.predict(X_test, top_k_list)
    micro = f1_score(Y_test, prediction, average='micro')
    macro = f1_score(Y_test, prediction, average='macro')
    return (micro, macro)

Ejemplo n.º 29

0

Mostrar archivo

Archivo: models.py Proyecto: wangzhicong/5001_ind

    def create_model(self, model_type, parameters):

        if model_type == 'lr':
            model = lr()
        elif model_type == 'svm':
            model = svm()
        elif model_type == 'mlp':
            model = mlp()
        elif model_type == 'rf':
            model = rf()
        elif model_type == 'xgb':
            model = xgb()
        return model.set_params(**parameters)

Ejemplo n.º 30

0

Mostrar archivo

Archivo: Models_Creation_&_Saving.py Proyecto: KalraH/Review_Project

def version1():  # Logistic Regression Model
    train_test_split(df["reviewText"], df["Positivity"], 100)

    features_train_vectorized = cv().fit_transform(features_train)
    features_test_vectorized = cv().transform(features_test)

    model = lr().fit(features_train_vectorized,
                     labels_train)  # Model creation for logistic regression
    predictions = model.predict(features_test_vectorized)

    ras(labels_test, predictions)  # Generating prediction score
    cm(labels_test, predictions)

    return model

Ejemplo n.º 31

0

Mostrar archivo

Archivo: sparse_model.py Proyecto: dallascard/guac

    def __init__(self, model_type=None, column_names=None, metric='f1', **kwargs):
        self.model_type = model_type
        self.column_names = column_names
        self.params = kwargs
        self.trained = None
        self.metric = metric
        if model_type == 'LR':
            if self.params.get('regularization', None) is None:
                self.params['regularization'] = 'l1'
            if self.params.get('alpha', None) is None:
                self.params['alpha'] = 1.0
            self.model = lr(penalty=self.params['regularization'], C=self.params['alpha'])
        elif model_type == 'SVM' or model_type == 'SVMNB':
            if self.params.get('kernel', None) is None:
                self.params['kernel'] = 'rbf'

            if model_type == 'SVM':
                if self.params.get('alpha', None) is None:
                    self.params['alpha'] = 0.1
            else:  # elif model_type == SVMNB:
                self.params['kernel'] = 'linear'
                if self.params.get('alpha', None) is None:
                    self.params['alpha'] = 1
                if self.params.get('beta', None) is None:
                    self.params['beta'] = 0.25

            if self.params['kernel'] == 'linear':
                # override regularization parameter to avoid a conflict
                self.params['regularization'] = 'l2'
                self.model = svm.LinearSVC(C=self.params['alpha'])
            else:  # elif self.params['kernel'] != 'linear':
                if self.params.get('degree', None) is None:
                    self.params['degree'] = 3
                if self.params.get('gamma', None) is None:
                    self.params['gamma'] = 0.0
                if self.params.get('coef0', None) is None:
                    self.params['coef0'] = 0.0
                self.model = svm.SVC(C=self.params['alpha'], kernel=self.params['kernel'], degree=self.params['degree'],
                                     gamma=self.params['gamma'], coef0=self.params['coef0'])
        elif model_type == 'MNB':
            if 'alpha' not in self.params:
                self.params['alpha'] = 1.0
            self.model = MultinomialNB(alpha=self.params['alpha'], fit_prior=True)
        elif model_type == 'myMNB':
            if 'alpha' not in self.params:
                self.params['alpha'] = 1.0
            self.model = None
        else:
            self.model_type = 'default'
            self.model = None

Ejemplo n.º 32

0

Mostrar archivo

Archivo: wind_classes.py Proyecto: zeshi1990/swe_reconstruction

 def linear_model(self, nldas_wind, type = 'speed'):
     X = nldas_wind
     if type == 'speed':
         y = self.wind_speed_anomaly
     else:
         y = self.wind_dir_anomaly
     mask = ~np.isnan(y)
     X = X[mask].reshape((len(X[mask]), 1))
     y = y[mask]
     lr_model = lr()
     lr_model.fit(X, y)
     est_y = lr_model.predict(X)
     std = np.sqrt(np.sum((est_y - y) ** 2) / (len(y) - 2))
     return lr_model, std

Ejemplo n.º 33

0

Mostrar archivo

def predict_lr(X, y, X_train, X_test, y_train, y_test):
    clf = lr(solver='lbfgs', multi_class='ovr')
    print("======Logistic Regression======")
    clf.fit(X_train, y_train)
    pickle.dump(clf, open('logreg_trained_new.sav', 'wb'))
    y_pred = clf.predict(X_test)
    calc_accuracy("Logistic regression", y_test, y_pred)
    np.savetxt('submission_surf_lr.csv',
               np.c_[range(1,
                           len(y_test) + 1), y_pred, y_test],
               delimiter=',',
               header='ImageId,Label,TrueLabel',
               comments='',
               fmt='%d')

Ejemplo n.º 34

0

Mostrar archivo

def LR_from_cfg(params):
    X_ = X[:]
    clf = lr(**params)
    if params['penalty'] == 'l2':
        if params['dual'] is True:
            if params['solver'] == 'liblinear':
                if params['multi_class'] == 'multinomial':
                    return 1 - 0.001
                else:
                    return 1 - cross_val_score(clf, X_, y, cv=5).mean()
            else:
                return 1 - 0.001
        else:
            if params['solver'] == 'liblinear' and params[
                    'multi_class'] == 'multinomial':
                return 1 - 0.001
            else:
                return 1 - cross_val_score(clf, X_, y, cv=5).mean()
    elif params['penalty'] == 'l1':
        if params['dual'] is True:
            return 1 - 0.001
        else:
            if params['solver'] == 'liblinear':
                if params['multi_class'] == 'multinomial':
                    return 1 - 0.001
                else:
                    return 1 - cross_val_score(clf, X_, y, cv=5).mean()
            elif params['solver'] == 'saga':
                return 1 - cross_val_score(clf, X_, y, cv=5).mean()
            else:
                return 1 - 0.001
    elif params['penalty'] == 'elasticnet':
        if params['dual'] is True:
            return 1 - 0.001
        else:
            if params['solver'] == 'saga':
                return 1 - cross_val_score(clf, X_, y, cv=5).mean()
            else:
                return 1 - 0.001
    elif params['penalty'] == 'none':
        if params['dual'] is True:
            return 1 - 0.001
        else:
            if params['solver'] == 'liblinear':
                return 1 - 0.001
            else:
                return 1 - cross_val_score(clf, X_, y, cv=5).mean()
    else:
        return 1 - cross_val_score(clf, X_, y, cv=5).mean()

Ejemplo n.º 35

0

Mostrar archivo

Archivo: RV_coefficient.py Proyecto: dihuang0220/GraphEnsembleLearning

def evaluateNodeClassification(X_train, X_test, Y_train, Y_test):
    try:
        top_k_list = list(Y_test.toarray().sum(axis=1))
    except:
        top_k_list = list(Y_test.sum(axis=1))
    classif2 = TopKRanker(lr())
    try:
        classif2.fit(X_train, Y_train)
        prediction = classif2.predict(X_test, top_k_list)
    except:
        print('Could not fit node classification model')
        prediction = np.zeros(Y_test.shape)
    micro = f1_score(Y_test, prediction, average='micro')
    macro = f1_score(Y_test, prediction, average='macro')
    return prediction

Ejemplo n.º 36

0

Mostrar archivo

Archivo: log_reg.py Proyecto: khalednakhleh/KickStarterChance

def log_reg(x, y, t, q):
    """ This function is an amalgamation of different minute tasks that 
    I just gatherd into a singal call function to ease work."""
    
    pred = lr(solver = "saga", tol = 0.001, max_iter = 600, n_jobs = -1, fit_intercept = True)
    pred.fit(x,y)                              # Predictor training
    g = pred.score(t,q)           # Predictor test
    pred = pred.predict(t)                     # Predicting correct labels
    
    # Printing some information for user
    print("------------------------------------------")
    print("accuracy rate is %{}" .format(round(g * 100 , 3)))
    print("Error rate is %{}" .format(round((1 - g) * 100 , 3)))
    
    return pred

Ejemplo n.º 37

0

Mostrar archivo

    def train_model(self):
        '''
        Trains simple logistic regression using the class labels.
        No regularization. The Metonymi features do all of the heavy lifting!
        '''
        print('TRAINING MODEL...')
        labels = self.frame[:, -1]
        frame = scale(self.frame[:, :-1])
        self.train, self.test, self.train_labels, self.test_labels = \
        tts(frame, labels, random_state=26, test_size=.15)
        self.model = lr(max_iter=200)
        self.model.fit(self.train, self.train_labels)
        print('DONE!\n')

        return True

Ejemplo n.º 38

0

Mostrar archivo

Archivo: metaclassifier_train.py Proyecto: saurabh-singh-17/secgov

def _train_SKLR_Classifier(extractedBases, lbls, params = {}):
    """ NLTK ME Training Wrapper"""

    Xtrn = makeSKFormat(extractedBases)
    ytrn = lbls

    C = params.get('C', 10)
    penalty = params.get('penalty', 'l1')
    class_weight = params.get('class_weight','auto')
    tol = params.get('tol', 1e-6)

    classifier = lr(C=C, penalty=penalty,
                    class_weight=class_weight, tol=tol)

    classifier.fit(Xtrn,ytrn)

    return classifier, list(classifier.classes_)

Ejemplo n.º 39

0

Mostrar archivo

Archivo: classify_test.py Proyecto: benbo/botc

def classify(data_filename, label_filename, feature_dir, list_of_features, model_type='LR',
             regularizer='l1', alpha=1.0, verbose=1):

    labels = pd.read_csv(label_filename, header=0, index_col=0)

    if not os.path.exists(feature_dir):
        os.makedirs(feature_dir)

    # for each feature in feature_list:
    items = None
    feature_matrices = []
    column_names = []
    print "Loading features"
    for feature in list_of_features:
        feature_description = feature
        rows, columns, counts = feature_loader.load_feature(feature_description, feature_dir, data_filename, verbose=1)
        if items is None:
            items = rows
        else:
            assert items == rows
        if verbose > 0:
            print "Loaded", feature, "with shape", counts.shape
        feature_matrices.append(counts)
        column_names.append(columns)

    # concatenate all features together
    X = sparse.csr_matrix(sparse.hstack(feature_matrices))
    column_names = np.concatenate(column_names)
    if verbose > 0:
        print "Full feature martix size:", X.shape

    #return items, column_names, X
    if model_type == 'LR':
        model = lr(penalty=regularizer, C=alpha)
    elif model_type == 'SVM':
        model = svm.LinearSVC(C=alpha, penalty=regularizer)
    else:
        sys.exit('Model type ' + model_type + ' not supported')

    y = labels.as_matrix().ravel()
    model.fit(X, y)
    pred = model.predict(X)
    f1 = f1_score(y_true=y, y_pred=pred)
    print f1
    return {'loss': -f1, 'status': STATUS_OK}

Ejemplo n.º 40

0

Mostrar archivo

Archivo: optimize_full_ensemble_classify_test.py Proyecto: anukat2015/ARKcat

def classify_one_model(feature_list, model_type='LR', regularizer='l1', alpha=1.0, converg_tol=0.01, verbose=1, folds=2, n_jobs=-1, score_eval='f1'):

    if model_type == 'LR':
        model = lr(penalty=regularizer, C=alpha, tol=converg_tol)
    elif model_type == 'SVM':
        model = svm.LinearSVC(penalty=regularizer, C=alpha, tol=converg_tol)
    else:
        sys.exit('Model type ' + model_type + ' not supported')

    train_X, train_Y = load_features(train_data_filename, train_label_filename, train_feature_dir, 
                                     feature_list, verbose)
    # Try loading dev data using train vocabulary, and not saving dev feature extractions
    dev_X, dev_Y = load_features(dev_data_filename, dev_label_filename, dev_feature_dir,
                                     feature_list, verbose, vocab_source=train_feature_dir)

    model.fit(train_X, train_Y)
    dev_pred_prob_Y = model.predict_proba(dev_X)
    
    return dev_pred_prob_Y, model, dev_Y

Ejemplo n.º 41

0

Mostrar archivo

Archivo: script.py Proyecto: SharmileeS/LABirthRates

data.drop('F19', axis=1, inplace=True)
selector = selector.fit(data, y)

#print which features have been selected
print "ATTRIBUTES WHICH HAVE BEEN SELECTED\n"
for i in xrange(0,len(data.columns)):
	if(selector.support_[i]==True):
		print data.columns[i]

df1=data[['FAC_NAME','F1','F2','F3','F4','F5','F6','F7','F8','F9','F10','F11','F12','F13','F14','F15','F16','F17','F18','F19','F20','F21','F22']]
clf=SVC()  #???
scores=cv1(clf,df1,y,cv=10)
print "\nSVC Cross validated Scores:\n"
print scores

clf1=lr()
scores1=cv1(clf1,df1,y,cv=10)
print "\nLogistic Regression Cross validated Scores:\n"
print scores1

model = GaussianNB()
scores2=cv1(model,df1,y,cv=10)
print "\nNaive Bayes Cross validated Scores:\n"
print scores2

model = DecisionTreeClassifier()
scores3=cv1(model,df1,y,cv=10)
print "\nDecision Trees validated Scores:\n"
print scores3

clf=LinearSVC()

Ejemplo n.º 42

0

Mostrar archivo

Archivo: logistic_regression.py Proyecto: bfetler/lending_club

def main():
    "main program"
    app = get_app_title()
    appf = get_app_file()
    plotdir = make_plotdir()
    
    loans_df, loans_y, test_df, test_y, numeric_vars = load_data()
    indep_vars = numeric_vars
    
    # skip scaling for now, score 0.71
    loans_X = loans_df
    test_X = test_df
    clf = lr()
    do_fit(clf, loans_X, loans_y, print_out=True)
    pred_y = do_predict(clf, test_X, test_y, print_out=True)  
    plot_predict(plotdir, app, appf, "rawvar", indep_vars, test_df, test_y, pred_y)

    # add scaling, score 0.90    
    loans_X, my_scaler = scale_train_data(loans_df, print_out=True)
    test_X = scale_test_data(my_scaler, test_df)
    
    clf = lr()
    do_fit(clf, loans_X, loans_y, print_out=True)
    pred_y = do_predict(clf, test_X, test_y, print_out=True)  
    plot_predict(plotdir, app, appf, "allvar", indep_vars, test_df, test_y, pred_y)
    print("columns:", indep_vars)
#   print_coefs(clf)
    X_labels = list(loans_df.columns)
#   print_lr_coefs(clf, X_labels)
    plist = print_lr_coefs(clf, indep_vars)

# find score using only top6
    top6 = [p[0] for p in plist[:6]]
    print("top6:", top6)
    loans_X = loans_df[top6]
    test_X = test_df[top6]
    loans_X, my_scaler = scale_train_data(loans_X, print_out=True)
    test_X = scale_test_data(my_scaler, test_X)
    clf = lr()
    do_fit(clf, loans_X, loans_y, print_out=True)
    pred_y = do_predict(clf, test_X, test_y, print_out=True)
    print_lr_coefs(clf, top6)
    plot_predict(plotdir, app, appf, "top6", top6, test_df, test_y, pred_y)

    do_roc(clf, test_X, test_y, "top6", top6, app, appf, plotdir)
    
#    arr = clf.decision_function(loans_df)
#    print("decision function:", arr.shape, arr)  # shape (1873,)
##    clf.decision_function(loans_df)
#    print_coefs(clf)
# traditional coefs in "frequentist" style?
#    proba = clf.predict_proba(loans_X)
#    print("proba", proba.shape, proba)
    
    explore_params(loans_X, loans_y, plotdir, app, appf)
    
    # run optimization routine
    clf = lr()
#    init_list = [indep_vars[0], indep_vars[1]]
#    random_opt(clf, indep_vars, init_list, loans_df, loans_y, print_out=True)
    opt_score, opt_list = run_opt(clf, numeric_vars, loans_df, loans_y, app, appf, plotdir, rescale=True)
    # accuracy 73% +- 3% with no scaling  (90% with scaling)
#    print_coefs(clf)

    # redo exploration with optimized columns
    loans_X = loans_df[opt_list]
    test_X = test_df[opt_list]
    loans_X, my_scaler = scale_train_data(loans_X, print_out=True)
    test_X = scale_test_data(my_scaler, test_X)
#    print("loans_X head\n", loans_X[:3])
    explore_params(loans_X, loans_y, plotdir, app, appf+"opt_")
    # accuracy 73% due to no scaling
    
    clf = lr()
    cross_validate(clf, loans_X, loans_y, print_out=True)
    
    clf = lr()
    do_fit(clf, loans_X, loans_y, print_out=True)
    pred_y = do_predict(clf, test_X, test_y, print_out=True)
    print("opt_list columns:", opt_list)
#   print_coefs(clf)
#   print_lr_coefs(clf, X_labels)
    print_lr_coefs(clf, opt_list)
    plot_predict(plotdir, app, appf, "optvar", opt_list, test_df, test_y, pred_y)

Ejemplo n.º 43

0

Mostrar archivo

Archivo: self_training_grid_new.py Proyecto: clur/Thesis

    print 'f1 macro:', res
    print
    # color = cm(1. * i / NUM_COLORS)  # color will now be an RGBA tuple
    # cm = plt.get_cmap('gist_rainbow')
    # fig = plt.figure(figsize=(8.0, 5.0))
    # ax = fig.add_subplot(111)
    # # ax.set_color_cycle([cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)])
    # ax.plot(range(len(scores)), scores, label=str(threshold))
    # ax.text(len(scores) - 1, scores[len(scores) - 1], threshold, fontsize='smaller')
    # plt.show()
    print name
    return res


vec_list = [tf(), cv()]
clf_list = [svc(), lr()]
threshold_list = np.arange(0.5, 3, 0.5)
print len(threshold_list)
# results_size = (len(vec_list), len(clf_list),len(threshold_list))
# results = np.zeros(results_size, dtype = np.float)
# a, b, c = range(3), range(3), range(3)
# def my_func(x, y, z):
#     return (x + y + z) / 3.0, x * y * z, max(x, y, z)

grids = np.vectorize(run)(*np.ix_(threshold_list, vec_list, clf_list))
# mean_grid, product_grid, max_grid = grids
print len(grids)
try:
    print grids.shape
except:
    print type(grids)

Ejemplo n.º 44

0

Mostrar archivo

Archivo: Logistic_regression_sklearn_2.py Proyecto: dwarakanandan/MachineLearning

	x[:,16] = (x1**4)*x2
	x[:,17] = (x1**3)*(x2**2)
	x[:,18] = (x1**2)*(x2**3)
	x[:,19] = x1*(x2**4)
	x[:,20] = x2**5
	x[:,21] = x1**6
	x[:,22] = (x1**5)*x2
	x[:,23] = (x1**4)*(x2**2)
	x[:,24] = (x1**3)*(x2**3)
	x[:,25] = (x1**2)*(x2**4)
	x[:,26] = x1*(x2**5)
	x[:,27] = x2**6
	return x

data = np.loadtxt("data_microchip.txt",delimiter=",")
m = data[:,0].size
x1 = data[:,0]
x2 = data[:,1]
x = map_features(x1,x2,m)
y = data[:,2]

reg = lr(C=10)
reg.fit(x,y)

s = reg.coef_.size
theta_ans = np.zeros((s+1))
theta_ans[0] = reg.intercept_[0]
theta_ans[1:] = reg.coef_
theta_ans = theta_ans.reshape(s+1,1)
print "%.2f%% accuracy"%(reg.score(x,y)*100)

Ejemplo n.º 45

0

Mostrar archivo

Archivo: Linear_regression_sklearn_1.py Proyecto: dwarakanandan/MachineLearning

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression as lr

data = np.loadtxt("ex1data1.txt",delimiter = ',')
m = data[:,0].size
x = data[:,0].reshape(m,1)
y = data[:,1]
a = lr(fit_intercept=True)
a.fit(x,y)
print a.coef_
print a.intercept_
print a.score(x,y)
plt.scatter(x,y)
plt.plot(x,a.predict(x))
plt.show()

Ejemplo n.º 46

0

Mostrar archivo

Archivo: logistic_regression.py Proyecto: navrug/Boltzmann-s-Cuisine

train_data = np.load('train_data.npy')

if load_saved:
    report = np.load("report.npy").item()    
    rbm = RBM(len(train_data), report["n_hidden"], report["batch_size"])
    rbm.W = report["W"]
    rbm.hbias = report["hbias"]
    rbm.vbias = report["vbias"]

Y = np.argmax(train_data[:,:20], axis=1)
train_data = train_data[:,20:]
X = sigmoid(np.dot(train_data, rbm.W) + rbm.hbias)
#X = train_data


classifier = lr(0.01, solver = 'lbfgs', multi_class='multinomial')
classifier.fit(X, Y)

test_data = np.load('test_data.npy')
test_X = sigmoid(np.dot(test_data, rbm.W) + rbm.hbias)
#test_X = test_data

pred = classifier.predict(test_X)
train_ids, train_cuisines, train_ingredients = read_data('train.json')
test_ids, test_cuisines, test_ingredients = read_data('test.json')
del train_ids, train_ingredients, test_cuisines, test_ingredients
le = LabelEncoder()
le.fit(train_cuisines)
pred = le.inverse_transform(pred)
create_submission(test_ids, pred)

Ejemplo n.º 47

0

Mostrar archivo

Archivo: Logistic_regression_sklearn_1.py Proyecto: dwarakanandan/MachineLearning

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression as lr

def show_scatter():
	data_admitted = data[data[:,2]==1]
	data_notadmitted = data[data[:,2]==0]
	plt.scatter(data_admitted[:,0],data_admitted[:,1],c='r',s=50)
	plt.scatter(data_notadmitted[:,0],data_notadmitted[:,1],c='b',s=50)
	x_coordinates = [0,-theta_ans[0][0]/theta_ans[1][0]]
	y_coordinates = [-theta_ans[0][0]/theta_ans[2][0],0]
	plt.plot(x_coordinates,y_coordinates)
	plt.show()


data = np.loadtxt("data_logistic_regression.txt",delimiter=",")
m = data[:,0].size
x = data[:,0:2]
y = data[:,2]

reg = lr(C=3.2)
reg.fit(x,y)
s = reg.coef_.size
theta_ans = np.zeros((s+1))
theta_ans[0] = reg.intercept_[0]
theta_ans[1:] = reg.coef_
theta_ans = theta_ans.reshape(s+1,1)
print theta_ans
print reg.score(x,y)*100,"% accuracy"
show_scatter()

Ejemplo n.º 48

0

Mostrar archivo

Archivo: model_lr.py Proyecto: Noahs-ARK/ARKcat

 def train(self, train_X, train_Y):
     self.model = lr(penalty=self.hp['regularizer'], C=self.hp['alpha'], tol=self.hp['converg_tol'])
     self.model.fit(train_X, train_Y)

Ejemplo n.º 49

0

Mostrar archivo

Archivo: linear_regression_matthias.py Proyecto: Fischmaa/projetspe

import pandas as pa
from sklearn.linear_model import LinearRegression as lr
import matplotlib.pyplot as plt
import random

random.seed(1)

tabtrain = pa.read_csv('sources/train.csv')
tabtest = pa.read_csv('sources/test.csv')

# On forme les tableaux des features
x_train = tabtrain.drop(['datetime','count','casual','registered'],1)
x_test = tabtest.drop(['datetime'],1)


# On forme les tableaux des résultats
y_train = tabtrain['count']


model = lr(5)

model.fit(x_train, y_train)

y_test = model.predict(x_test)
y_test = pa.DataFrame(y_test)
y_test.index = tabtest['datetime']

print(y_test)

Ejemplo n.º 50

0

Mostrar archivo

Archivo: clf_speed.py Proyecto: bfetler/sklearn_clf_speed

def logistic_regression_speed_test(dftrain, dftrain_y, plotdir):
    atitle = 'Logistic Regression'
    afile = 'logreg'
    clf = lr()
#   speed_test_medium(clf, dftrain, dftrain_y, atitle, afile, plotdir)
    speed_test_large(clf, dftrain, dftrain_y, atitle, afile, plotdir)

Ejemplo n.º 51

0

Mostrar archivo

Archivo: kLogisticRegression.py Proyecto: abdulwahidgul24085/sfcc

testDf = auxiliary.initialise_test(False)
ids = testDf['Id'].values
# Id,Dates,DayOfWeek,PdDistrict,Address,X,Y,Year,Week,Hour
testDf = testDf.drop(['Id', 'Dates', 'Address', 'X', 'Y'], axis=1)

# Random Forest Algorithm
print list(trainDf.columns.values)
print list(testDf.columns.values)
#print list(trainDf.X.values)

# back to numpy format
trainData = trainDf.values
testData = testDf.values

print 'Training...'
logit = lr()
logit = logit.fit(trainData[0::,1::], trainData[0::,0])

print 'Predicting...'
output = logit.predict_proba(testData).astype(float)
output = output.tolist()

predictions_file = open("../submissionLR.csv", "wb")
open_file_object = csv.writer(predictions_file)
open_file_object.writerow(["Id",'ARSON','ASSAULT','BAD CHECKS','BRIBERY','BURGLARY','DISORDERLY CONDUCT',
                           'DRIVING UNDER THE INFLUENCE','DRUG/NARCOTIC','DRUNKENNESS','EMBEZZLEMENT','EXTORTION',
                           'FAMILY OFFENSES','FORGERY/COUNTERFEITING','FRAUD','GAMBLING','KIDNAPPING','LARCENY/THEFT',
                           'LIQUOR LAWS','LOITERING','MISSING PERSON','NON-CRIMINAL','OTHER OFFENSES',
                           'PORNOGRAPHY/OBSCENE MAT','PROSTITUTION','RECOVERED VEHICLE','ROBBERY','RUNAWAY',
                           'SECONDARY CODES','SEX OFFENSES FORCIBLE','SEX OFFENSES NON FORCIBLE','STOLEN PROPERTY',
                           'SUICIDE','SUSPICIOUS OCC','TREA','TRESPASS','VANDALISM','VEHICLE THEFT','WARRANTS',

Ejemplo n.º 52

0

Mostrar archivo

Archivo: linear_regression_matthias.py Proyecto: jorianvb/projetspe

import pandas as pa
from sklearn.linear_model import LinearRegression as lr
import matplotlib.pyplot as plt
import random

random.seed(1)

tabtrain = pa.read_csv('sources/train.csv')
tabtest = pa.read_csv('sources/test.csv')

# On forme les tableaux des features
x_train = tabtrain.drop(['datetime','count','casual','registered'],1)
x_test = tabtest.drop(['datetime'],1)


# On forme les tableaux des résultats
y_train = tabtrain['count']


model = lr()

model.fit(x_train, y_train)

y_test = model.predict(x_test)
y_test = pa.DataFrame(y_test)
y_test.index = tabtest['datetime']

print(y_test)

Ejemplo n.º 53

0

Mostrar archivo

Archivo: titanic.py Proyecto: takushi-m/kaggle-titanic

    x["miss"] = data.Name.map(lambda x:1 if x.lower().find("miss")>=0 else 0)
    x["master"] = data.Name.map(lambda x:1 if x.lower().find("master")>=0 else 0)

    x["embark_C"] = data.Embarked.map(lambda x:1 if x=="C" else 0)
    x["embark_Q"] = data.Embarked.map(lambda x:1 if x=="Q" else 0)
    x["embark_S"] = data.Embarked.map(lambda x:1 if x=="S" else 0)

    #return x
    p = poly(2, interaction_only=False)
    return p.fit_transform(x)

if __name__ == "__main__":
    data = pd.read_csv("./data/train.csv")

    x = makeInput(data)
    y = data.Survived

    model = lr(C=0.2)
    model.fit(x,y)

    test_data = pd.read_csv("./data/test.csv")
    x_test = makeInput(test_data)
    predict = model.predict(x_test)
    predict = pd.Series(predict)

    y_test = pd.DataFrame({
        "PassengerId": test_data.PassengerId
        ,"Survived": predict
    })
    y_test.to_csv("./predict.csv", index=False)