Python lr Exemples, sklearn.linear_model.lr Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : wind_classes.py Projet : zeshi1990/swe_reconstruction

    def nldas_correlate(self):
        nldas_list = pickle.load(open("wind/nldas.p", "rb"))
        nldas = None
        for temp in nldas_list:
            if self.station_id == temp.station_id:
                nldas = temp
                break
        nldas_idx = np.where(np.logical_and(nldas.date >= np.min(self.date), nldas.date <= np.max(self.date)))
        nldas_wind_speed_anomaly = nldas.wind_speed_anomaly[nldas_idx]
        nldas_wind_dir_anomaly = nldas.wind_dir_anomaly[nldas_idx]

        fit_lr = lr()
        # mask1 = self.reject_outliers(self.wind_speed_anomaly)
        mask1 = ~np.isnan(self.wind_speed_anomaly)
        fit_lr.fit(nldas_wind_speed_anomaly[mask1].reshape((len(nldas_wind_speed_anomaly[mask1]), 1)), self.wind_speed_anomaly[mask1])
        result1 = fit_lr.predict(nldas_wind_speed_anomaly[mask1].reshape((len(nldas_wind_speed_anomaly[mask1]), 1)))
        std = np.sqrt(np.sum((self.wind_speed_anomaly[mask1] - result1) ** 2) / (len(result1) - 2))
        print "Standard deviation of the wind speed estimate is", std

        fit_lr = lr()
        # mask2 = self.reject_outliers(self.wind_dir_anomaly)
        mask2 = ~np.isnan(self.wind_dir_anomaly)
        fit_lr.fit(nldas_wind_dir_anomaly[mask2].reshape((len(nldas_wind_dir_anomaly[mask2]), 1)), self.wind_dir_anomaly[mask2])
        result2 = fit_lr.predict(nldas_wind_dir_anomaly[mask2].reshape((len(nldas_wind_dir_anomaly[mask2]), 1)))
        std = np.sqrt(np.sum((self.wind_dir_anomaly[mask2] - result2) ** 2) / (len(result2) - 2))
        print "Standard deviation of the wind direction estimate is", std

        fig = plt.figure()
        ax1 = fig.add_subplot(211)
        ax1.plot(nldas_wind_speed_anomaly[mask1], self.wind_speed_anomaly[mask1], '.b')
        ax1.plot(nldas_wind_speed_anomaly[mask1], result1, '-r')
        ax2 = fig.add_subplot(212)
        ax2.plot(nldas_wind_dir_anomaly[mask2], self.wind_dir_anomaly[mask2], '.g')
        ax2.plot(nldas_wind_dir_anomaly[mask2], result2, '-r')
        plt.show()

Exemple #2

0

Afficher le fichier

Fichier : coronary_predict.py Projet : bfetler/coronary_disease

def main():
    plotdir = make_plotdir()
    train_X, test_X, train_y, test_y = load_data('cleveland', plotdir, print_out=False)
#   X_labels = list(train_X.columns)
    test_incoming(test_X, train_X)
    
    plot_hists(train_X, plotdir, label='Train')
    plot_hists(test_X, plotdir, label='Test')
    
    scale_cols = ['age','b_pressure','cholesterol','heart_rate','exer_depress','fluor_count']
    train_X, test_X = scale_data(train_X, test_X, scale_cols)
#   one_hot_cols = ['chest_pain','ecg_type','exer_slope','thal_defect']
    one_hot_cols = ['chest_pain']
    train_X, test_X = one_hot_encode(train_X, test_X, one_hot_cols)
#   print('one hot encode train_X head\n', train_X[:3])
    X_labels = list(train_X.columns)
    
    clf = lr()
    fit_predict(clf, train_X, train_y, test_X, test_y, label='logistic')
    cross_validate(clf, train_X, train_y['Y'], print_out=True)
    print_lr_coefs(clf, X_labels)

    clf = LinearSVC()   # data must first be scaled
    fit_predict(clf, train_X, train_y, test_X, test_y, label='svc')
    cross_validate(clf, train_X, train_y['Y'], print_out=True)
    
    explore_pca(train_X)

Exemple #3

0

Afficher le fichier

Fichier : classify_test.py Projet : anukat2015/ARKcat

def classify(train_data_filename, train_label_filename, dev_data_filename, dev_label_filename, 
             train_feature_dir, dev_feature_dir, feature_list, model_type='LR', 
             regularizer='l1', alpha=1.0, converg_tol=0.01, verbose=1, folds=2, n_jobs=-1, score_eval='f1'):
    
    if model_type == 'LR':
        model = lr(penalty=regularizer, C=alpha, tol=converg_tol)
    elif model_type == 'SVM':
        model = svm.LinearSVC(penalty=regularizer, C=alpha, tol=converg_tol)
    else:
        sys.exit('Model type ' + model_type + ' not supported')

    train_X, train_Y = load_features(train_data_filename, train_label_filename, train_feature_dir, 
                                     feature_list, verbose)
    #if we have separate dev data, so we don't need cross validation
    if folds < 1:
        # Try loading dev data using train vocabulary, and not saving dev feature extractions
        dev_X, dev_Y = load_features(dev_data_filename, dev_label_filename, dev_feature_dir,
                                     feature_list, verbose, vocab_source=train_feature_dir)

        dev_f1, dev_acc, train_f1, train_acc = compute_evaluation_metrics(train_X, train_Y, dev_X, dev_Y, model)
        print('train acc: ' + str(train_acc))
        print('dev acc: ' + str(dev_acc))
        neg_loss = dev_acc
    #if we don't have separate dev data, so we need cross validation
    else:
        skf = StratifiedKFold(train_Y, folds,random_state=17)
        neg_loss = cross_val_score(model, train_X, train_Y, cv=skf,scoring=score_eval,n_jobs=n_jobs).mean()
        print('crossvalidation f1: ' + str(f1))

    return {'loss': -neg_loss, 'status': STATUS_OK, 'model': model}

Exemple #4

0

Afficher le fichier

Fichier : linearmodel.py Projet : cnh/KaunHaiSacheyVoters_Better-India-BPAC

def trainModel():
	fh = open('train.features')
	X = []
	for x in fh:
		x = x.strip()
		x = x.split(',')
		x = [int(x1) for x1 in x]

		X.append(x)
	fh.close()
	fh = open('train.labels')
	Y = []
	for y in fh:
		y = y.strip()
		Y.append(int(y))
	fh.close()
	clf = lr()
	clf.fit(X,Y)
	print sigmoid(clf.predict(X[45]))
	print clf.coef_
	#np.save("lr_coeff",clf.coef_)
	print clf.intercept_
	#np.save("lr_intercept",clf.intercept_)
	score = np.dot(clf.coef_, X[45])+ clf.intercept_
	print sigmoid(score)

	coeff = np.load("lr_coeff.npy")
	intercept = np.load("lr_intercept.npy")
	score = np.dot(coeff, X[45]) + intercept
	print sigmoid(score)

Exemple #5

0

Afficher le fichier

Fichier : preprocessing_surf.py Projet : prakhar897/ISLR

def predict_lr(X_train, X_test, y_train, y_test):
    clf = lr()
    print("lr started")
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_test)
    calc_accuracy("Logistic regression",y_test,y_pred)
    np.savetxt('submission_surf_lr.csv', np.c_[range(1,len(y_test)+1),y_pred,y_test], delimiter=',', header = 'ImageId,Label,TrueLabel', comments = '', fmt='%d')
    return clf

Exemple #6

0

Afficher le fichier

Fichier : final_project.py Projet : T-Goon/ML-Final-Project

def main():
    if (not path.exists('training_images_pos0.npy')
            and not path.exists('training_image_neg0.npy')):
        convert_to_numpy()

        # (# samples, 256, 128, 3)
    train_pos = np.load('training_images_pos0.npy')
    train_neg = np.load('training_images_neg0.npy')
    print(train_pos.shape)
    print(train_neg.shape)

    # flatten images
    train_pos = preprocessing.minmax_scale(
        train_pos.reshape((train_pos.shape[0], 32768)))
    train_neg = preprocessing.minmax_scale(
        train_neg.reshape((train_neg.shape[0], 32768)))

    pos_label = np.ones(train_pos.shape[0])
    neg_label = np.zeros(train_neg.shape[0])

    trainX = np.concatenate((train_pos, train_neg))
    trainY = np.concatenate((pos_label, neg_label))

    idxs = np.random.permutation(trainX.shape[0])

    trainX = trainX[idxs]
    trainY = trainY[idxs]

    # train_pos = np.concatenate((train_pos, pos_label), axis=1)
    # train_neg = np.concatenate((train_neg, neg_label), axis=1)
    model = lr()
    clf = model.fit(trainX, trainY)
    pickle.dump(model, open('model.sav', 'wb'))

    test_pos = np.load('testing_images_pos0.npy')
    test_neg = np.load('testing_images_neg0.npy')
    print(test_pos.shape)
    print(test_neg.shape)

    test_pos = preprocessing.minmax_scale(
        test_pos.reshape((test_pos.shape[0], 32768)))
    test_neg = preprocessing.minmax_scale(
        test_neg.reshape((test_neg.shape[0], 32768)))

    pos_label_test = np.ones(test_pos.shape[0])
    neg_label_test = np.zeros(test_neg.shape[0])

    testX = np.concatenate((test_pos, test_neg))
    testY = np.concatenate((pos_label_test, neg_label_test))

    idxs = np.random.permutation(testX.shape[0])

    testX = testX[idxs]
    testY = testY[idxs]

    print(clf.score(testX, testY))
    roc_auc_score(testY, clf.predict_proba(testX)[:, 1])

Exemple #7

0

Afficher le fichier

def run_lr():
	clf = lr()
	print("lr started")
	clf.fit(x,y)
	#print clf.n_layers_
	pred=clf.predict(x_)
	#print(pred)
	np.savetxt('submission_lr.csv', np.c_[range(1,len(test)+1),pred,label_test], delimiter=',', header = 'ImageId,Label,TrueLabel', comments = '', fmt='%d')
	calc_accuracy("Logistic regression",label_test,pred)

Exemple #8

0

Afficher le fichier

def main():
    dfcol, dups = readRawColumns()
    dftrain, dftrain_y, dftest, dftest_y = readRawData(dfcol)

    dftrain = renameColumns(dftrain)
    dftest = renameColumns(dftest)
    print("dftrain shape head", dftrain.shape, "\n", dftrain[:3])
    print("dftest shape head", dftest.shape, "\n", dftest[:3])
    print("dftrain stats\n", dftrain.describe())
    # groupby subject, activity(y) ?
    #    print("dftrain group by subject stats\n", dftrain.groupby('subject').describe())

    make_plotdir()
    explore_pca(dftrain, dftest, "all")  # 562 columns

    clf = LinearSVC()
    print("fitting LinearSVC")
    fit_predict(clf, dftrain, dftrain_y, dftest, dftest_y,
                'raw data, all cols')
    fit_predict(clf, dftrain.ix[:, :30], dftrain_y, dftest.ix[:, :30],
                dftest_y, 'raw data, 30 cols')
    # 30 columns not sorted by pca - only 70% accuracy

    X_train, X_test = quick_pca(dftrain, dftest, ncomps=100)

    print("fitting LinearSVC with PCA input")
    preds = []
    for j in [10, 20, 30, 50, 100]:
        p = fit_predict(clf, X_train[:, :j], dftrain_y, X_test[:, :j],
                        dftest_y, 'pca {:d} cols'.format(j))
        preds.append((j, p))
    plot_pca_fit(preds, "svc", "SVC")

    do_svc_gridsearch(X_train[:, :30], dftrain_y)

    print("Cross-validating LinearSVC with PCA input")
    get_cv_scores(clf, X_train[:, :30],
                  dftrain_y)  # randomized, not grouped by subject
    # 30 columns sorted by pca - 89% accuracy

    clf = lr()
    print("fitting Logistic Regression with PCA input")
    preds = []
    for j in [10, 20, 30, 50, 100]:
        p = fit_predict(clf, X_train[:, :j], dftrain_y, X_test[:, :j],
                        dftest_y, 'pca {:d} cols'.format(j))
        preds.append((j, p))
    plot_pca_fit(preds, "lr", "Logistic Regression")
    print("Cross-validating Logistic Regression with PCA input")
    get_cv_scores(clf, X_train[:, :30], dftrain_y)

    txt = '''\nConclusion: Using PCA as input to Logistic Regression or LinearSVC is effective, 
with 91% accuracy using only 30 components (5.4% of 562 total).  For six 
predicted classes, a classification report shows precision of 85% and greater 
(also confirmed by confusion matrix).  Cross-validation gives average fit 
scores of 89% +- 5%.'''
    print(txt)

Exemple #9

0

Afficher le fichier

Fichier : utilities.py Projet : natnij/timeseries

def regressMissingData(x, y, xnew, robust=True):
    '''
    linear or robust linear regression to fill in missing data.
    
    author: Nat
    
    input: 
        x: independent variables with corresponding dependent variable y
        xnew: independent variables with MISSING dependent variable y
        y: dependent variable which is known
    output:
        ynew: regressed y value where y is missing       
    '''
    import pandas as pd
    from sklearn.linear_model import LinearRegression as lr
    m = lr()
    m.fit(x, y)
    ynew_lr = pd.DataFrame(m.predict(xnew), columns=['WON_MONTH2'])

    from sklearn.linear_model import RANSACRegressor as ransac
    m_ransac = ransac(lr())
    m_ransac.fit(x, y)
    ynew_ransac = pd.DataFrame(m_ransac.predict(xnew), columns=['WON_MONTH2'])
    #    import numpy as np
    #    from matplotlib import pyplot as plt
    #    yhat_lr = pd.DataFrame(m.predict(x))
    #    yhat_ransac = pd.DataFrame(m_ransac.predict(x))
    #    inlier_mask = m_ransac.inlier_mask_
    #    outlier_mask = np.logical_not(inlier_mask)
    #    plt.scatter(x[inlier_mask], y[inlier_mask],
    #                color='green', marker='.',
    #                label='Inliers')
    #    plt.scatter(x[outlier_mask], y[outlier_mask],
    #                color='red', marker='.',
    #                label='Outliers')
    #    plt.plot(pd.concat([x,xnew]), pd.concat([yhat_ransac, ynew_ransac]), '-',
    #             label='RANSAC regressor')
    #    plt.plot(pd.concat([x,xnew]), pd.concat([yhat_lr, ynew_lr]), '-',
    #             label='linear regressor')
    #    plt.show()
    if robust == True:
        return ynew_ransac
    else:
        return ynew_lr

Exemple #10

0

Afficher le fichier

Fichier : ZSL_graph_algorithm_4.0.py Projet : sailfish009/Graph_ZSL

 def train_edge_classification(X_train, Y_train):
     """
     train  the classifier with the train set.
     :param X_train: The features' edge- norm (train set).
     :param Y_train: The edges labels- 0 for true, 1 for false (train set).
     :return: The classifier
     """
     classif2 = TopKRanker(lr())
     classif2.fit(X_train, Y_train)
     return classif2

Exemple #11

0

Afficher le fichier

Fichier : learn_classifier.py Projet : benbo/QPR_CP1

 def get_model(self,args):
     if args['model']['model'] == 'LR':
         model = lr(penalty=args['model']['regularizer_lr'], C=args['model']['C_lr'],n_jobs=self.cjobs)
     elif args['model']['model'] == 'SVM':
         if args['model']['regularizer_svm'] == 'l1':
             #squared hinge loss not available when penalty is l1. 
             model = svm.LinearSVC(C=args['model']['C_svm'], penalty=args['model']['regularizer_svm'],dual=False,n_jobs=self.cjobs)#loss='hinge')
         else:
             model = svm.LinearSVC(C=args['model']['C_svm'], penalty=args['model']['regularizer_svm'],n_jobs=self.cjobs)
     return model

Exemple #12

0

Afficher le fichier

def get_classifier():
    x = query_features[query_features.columns[2:23]]
    y = query_features[query_features.columns[-1]]
    x_train, x_test, y_train, y_test = sk_model.train_test_split(x,
                                                                 y,
                                                                 test_size=0.2)

    clf = lr(max_iter=1000).fit(x_train, y_train)

    return clf

Exemple #13

0

Afficher le fichier

def fit_and_test():
    data, target = pd.read_train()

    train_x, val_x, train_y, val_y = t(data, target, test_size=0.1)

    m = lr()
    m.fit(train_x, train_y)

    print("Score on validation")
    print(m.score(val_x, val_y))

Exemple #14

0

Afficher le fichier

Fichier : MLProject.py Projet : Fy1717/HelloWorldMLDjango

def linear_regression(x, y):
    lineerreg = lr(
    )  #sklearn lineer regresyon modelini 'lineerreg' adıyla kullancağız
    lineerreg.fit(
        x, y)  # örneğin veri üzerinde öğrenmesi fit fonksiyonuyla yapılıyor
    lineerreg.predict(x)  #tahmin fonksiyoru
    m = lineerreg.coef_  #eğim
    b = lineerreg.intercept_  #b değeri
    plt.scatter(x, y)  # matplotlib ile noktaları gösterme
    plt.plot(x, lineerreg.predict(x), c="red")  # doğruyu çizdirme
    plt.show()  # çizilen grafiği göster

Exemple #15

0

Afficher le fichier

Fichier : more_runs.py Projet : sailfish009/Graph_ZSL

def train_edge_classification(X_train, Y_train):
    """
    Predictions of nodes' labels.
    :param X: The features' graph- norm
    :param Y: The edges labels- 0 for true, 1 for false
    :param test_ratio: To determine how to split the data into train and test
    :return: Scores- F1-macro, F1-micro accuracy and auc
    """
    classif2 = TopKRanker(lr())
    classif2.fit(X_train, Y_train)
    return classif2

Exemple #16

0

Afficher le fichier

 def construct_all_models(self, hyperTune):
     if hyperTune:
         #3 models KNN SCM and LR
         self.models={'SVM':[SVC(kernel='linear',probability=True),dict(C=np.arange(0.01, 2.01, 0.2))],\
                      'LogisticRegression':[lr(),dict(C=np.arange(0.1,3,0.1))],\
                      'KNN':[KNeighborsClassifier(),dict(n_neighbors=range(1, 100))],}
         for name, candidate_hyperParam in self.models.items():
             #update each classifier after training and tuning
             self.models[name] = self.train_with_hyperParamTuning(
                 candidate_hyperParam[0], name, candidate_hyperParam[1])
         print('\nTraining process finished\n\n\n')

Exemple #17

0

Afficher le fichier

Fichier : evaluation.py Projet : ZagHe568/personalized_graph_embedding

def eval_node_classification(X_train, Y_train, X_test, Y_test):

    # y_train = (n_sample, n_classes)
    top_k_list = list(Y_test.sum(axis=1))
    classif2 = TopKRanker(lr(solver='liblinear'))
    classif2.fit(X_train, Y_train)
    prediction = classif2.predict(X_test, top_k_list)
    micro = f1_score(Y_test, prediction, average='micro')
    macro = f1_score(Y_test, prediction, average='macro')

    return micro, macro

Exemple #18

0

Afficher le fichier

Fichier : test1.py Projet : wqmike123/thesis

def getR2(y_actual, factor, isRet=False):
    n = len(y_actual)
    y = np.array(y_actual).reshape((n, 1))
    x = np.array(factor).reshape((n, 1))
    if isRet:
        n = n - 1
        y = np.log(y[1:] / y[:-1])
        x = x[:-1]
    reg = lr()
    reg.fit(x, y)
    return r2_score(y, reg.predict(x))

Exemple #19

0

Afficher le fichier

Fichier : logistic_regression.py Projet : bfetler/lending_club

def explore_params(loans_X, loans_y, plotdir, app, appf):
    '''Explore fit parameters on training data,
       grid search of fit scores, boxplot gridsearch results.'''
    clf = lr()
    param_grid = [{'C': [0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]}]
    gs = GridSearchCV(estimator=clf, param_grid=param_grid, cv=10, \
      verbose=1, n_jobs=-1, scoring='accuracy')
    gs.fit(loans_X, loans_y)  # fit all grid parameters
    print("gs grid scores\n", gs.grid_scores_)
    print("gs best score %.5f %s\n%s" % \
      (gs.best_score_, gs.best_params_, gs.best_estimator_))
    gridscore_boxplot(gs.grid_scores_, plotdir, app, appf, "C", "solver='liblinear'")

Exemple #20

0

Afficher le fichier

def cal_linear_reg_r(y, x=None):
    '''
    计算y中数据点的斜率（一元线性回归）
    y和x为list或pd.Series或np.array
    '''
    if isnull(x):
        X = pd.DataFrame({'X': range(0, len(y))})
    else:
        X = pd.DataFrame({'X': x})
    y = pd.Series(y)
    mdl = lr().fit(X, y)
    return mdl.coef_[0], mdl.intercept_

Exemple #21

0

Afficher le fichier

Fichier : utilities.py Projet : natnij/timeseries

def myLr(x, y, xnew):
    '''
    calls sklearn.linear_model.LinearRegression
    wrapper author: Nat
    '''
    from sklearn.linear_model import LinearRegression as lr
    import numpy as np
    model = lr()
    model.fit(x, y)
    ynew = model.predict(xnew)
    ynew = np.where(ynew < 0, 0, ynew)
    return ynew

Exemple #22

0

Afficher le fichier

def _plotDegreedist(degree_df, plot_model=False, path=None):
    """
    Args:
        degree_df (pandas.DataFrame): data_frame that include degree.
            degree info shold be stored in the column, "degree"

        plot_model (bool): Whether to plot linear approximation line.

        path (str): Folder path to save plots. If the folde does not exist in the path, the function create the folder.
            If None, plots will not be saved. Default is None.
    """

    from sklearn.linear_model import LinearRegression as lr
    df = degree_df.copy()

    dist = df.degree.value_counts() / df.degree.value_counts().sum()
    dist.index = dist.index.astype(np.int)

    fig, ax = plt.subplots(1, 2)

    ax[0].scatter(dist.index.values, dist.values, c="black")
    ax[0].set_title("degree distribution")
    ax[0].set_xlabel("k")
    ax[0].set_ylabel("P(k)")

    #plt.yscale('log')
    #plt.xscale('log')

    x = np.log(dist.index.values).reshape([-1, 1])
    y = np.log(dist.values).reshape([-1, 1])
    if plot_model:
        model = lr()
        model.fit(x, y)
        x_ = np.array([-1, 5]).reshape([-1, 1])
        y_ = model.predict(x_)

        ax[1].set_title(
            f"degree distribution (log scale)\nslope: {model.coef_[0][0] :.4g}, r2: {model.score(x,y) :.4g}"
        )
        ax[1].plot(x_.flatten(), y_.flatten(), c="black", alpha=0.5)
    else:
        ax[1].set_title(f"degree distribution (log scale)")

    ax[1].scatter(x.flatten(), y.flatten(), c="black")
    ax[1].set_ylim([y.min() - 0.2, y.max() + 0.2])
    ax[1].set_xlim([-0.2, x.max() + 0.2])
    ax[1].set_xlabel("log k")
    ax[1].set_ylabel("log P(k)")

    if path is not None:
        fig.savefig(path, transparent=True)
    plt.show()

Exemple #23

0

Afficher le fichier

    def regression(self, metric="root_mean_squared_error", folds=10, alphas=[], graph=False):
        size = 1.3 * self.report_width // 10

        models = {}
        models["Linear regressor"]                  = lr()
        models["Lasso regressor"]                   = lassor()
        models["Lasso CV regressor"]                = lassocvr()
        models["Ridge regressor"]                   = rr(alpha=0, normalize=True)
        models["Ridge CV regressor"]                = rcvr(alphas = alphas)
        models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform')
        models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance')
        models["K nearest neighbors regressor K5"]  = knnr(n_neighbors=5)
        models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10)
        models["SGD regressor"]                     = sgdr(max_iter=10000, warm_start=True)
        models["Decision tree regressor"]           = dtr()
        models["Decision tree regressor D3"]        = dtr(max_depth=3)
        models["Random forest regressor"]           = rfr()
        models["Ada boost regressor"]               = abr()
        models["Gradient boost regressor"]          = gbr()
        models["Support vector regressor"]          = svr()
        self.models = models

        print('\n')
        print(self.report_width * '*', '\n*')
        print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*')
        #kf = StratifiedKFold(n_splits=folds, shuffle=True)
        kf = KFold(n_splits=folds)
        results = []
        names = []
        for model_name in models:
            cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train.values.ravel(), cv=kf, scoring=metric)  
            results.append(cv_scores)
            names.append(model_name)
        print(self.report_width * '*', '')
        report = pd.DataFrame({'Regressor': names, 'Score': results})
        report['Score (avg)'] = report.Score.apply(lambda x: x.mean())
        report['Score (std)'] = report.Score.apply(lambda x: x.std())
        report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)']
        report.sort_values(by='Score (avg)', inplace=True)
        report.drop('Score', axis=1, inplace=True)
        display(report)
        print('\n')
        if graph:
            fig, ax = plt.subplots(figsize=(size, 0.5 * size))
            plt.title('Regressor Comparison')
            #ax = fig.add_subplot(111)
            plt.boxplot(results)
            ax.set_xticklabels(names)
            plt.xticks(rotation=45)
            plt.subplots_adjust(hspace=0.0)
            plt.show()             
        return None

Exemple #24

0

Afficher le fichier

    def test(self, model_name, graph=False):
        size = 1.3 * self.report_width // 10
        model = self.models[model_name]
        # fit using the train subset
        X, y = self.Xt_train, self.yt_train
        model.fit(X, y)

        # evaluate using the test subset
        X, y = self.Xt_test, self.yt_test
        
        if self.strategy == 'regression':
            y_hat = model.predict(X)
            # show residual analysis
            self.residual(y, y_hat, model_name, graph)
            if graph:
                # show the correlation between y and y_hat
                fig, ax = plt.subplots(figsize=(size, 0.5 * size))
                plt.title('Model Overall Performance')
                plt.scatter(y, y_hat, color='g')
                viewer = lr()
                plt.plot(y, viewer.fit(y, y_hat).predict(y), color='k')
                plt.xlabel('Observed')
                plt.ylabel('Predicted')
                plt.show()

        else:
            y_pred = model.predict(X)
            sample_size = len(y_pred)
            print('\n')
            print(self.report_width * '*', '\n*')
            print('* MODEL PERFORMANCE \n*')
            print('* MODEL NAME: ', model_name)
            print('* TEST SAMPLE SIZE: ', sample_size)
            print('* ACCURACY: ', round(accuracy_score(y, y_pred)*100, 1), '%')
            print('* ')
            print(self.report_width * '*', '\n')
            report = classification_report(y, y_pred, output_dict=True)
            if graph:
                fig, ax = plt.subplots(figsize=(size, 0.3 * size))
                plt.title('Confusion Matrix')
                sns.heatmap(confusion_matrix(y, y_pred), annot=True, cmap='YlGn', fmt='d',)
                plt.xlabel('Predicted')
                plt.ylabel('True Class')
                plt.show()
                fig, ax = plt.subplots(figsize=(size, 0.5 * size))
                plt.title('Classification Report')
                sns.heatmap(pd.DataFrame(report).iloc[0:3].T, annot=True, vmin=0, vmax=1, cmap='BrBG', fmt='.2g')
                plt.xlabel('Score')
                plt.show()
            else:
                display(pd.DataFrame(report).T)
        return None

Exemple #25

0

Afficher le fichier

def log_reg(x, y, t, q):

    # Logistic Regression predictor initialization

    pred = lr(solver="saga", max_iter=200, multi_class="multinomial", tol=0.1)
    start = timer()  # Start timer
    pred.fit(x, y)  # Predictor training
    pred.result = pred.score(t, q)  # Predictor test
    pred.error = 1 - pred.result  # error probability
    pred.end = timer() - start  # End timer
    q = pred.predict(t)

    return q, pred

Exemple #26

0

Afficher le fichier

def gs(x, y="prob"):
    #70/30 train test split
    x_train, x_test, y_train, y_test = tts(x, x.label, test_size=0.3)
    data = x_train.iloc[:, :32]
    test_data = x_test.iloc[:, :32]

    #train model
    classifier = lr(random_state=0).fit(data, y_train)
    if y == "prob":
        pred = classifier.predict_proba(test_data)
    else:
        pred = classifier.predict(test_data)
    return pred, y_test.values

Exemple #27

0

Afficher le fichier

Fichier : build_linear_models.py Projet : Manjunathsk92/dbanalysis

def model_stop(df):
    #df = pd.get_dummies(df,columns=['day'])
    #features = ['day_'+str(i) for i in range(0,7)]
    #for f in features:
    #    if f not in df.columns:
    #        df[f] = 0
    df = df[df['traveltime'] < df['traveltime'].quantile(0.95)]
    features = ['rain','temp','vappr','hour','hour2','hour3','hour4','day','day2','day3','day4']
    for i in range(2,5):
        df['hour'+str(i)] = df['hour'] ** i
        df['day'+str(i)] = df['day'] ** i
    model = lr(fit_intercept=True).fit(df[features],df['traveltime'])
    return model,df,features

Exemple #28

0

Afficher le fichier

def evaluateNodeClassification(X, Y, test_ratio):
    X_train, X_test, Y_train, Y_test = sk_ms.train_test_split(
        X, Y, test_size=test_ratio)
    try:
        top_k_list = list(Y_test.toarray().sum(axis=1))
    except:
        top_k_list = list(Y_test.sum(axis=1))
    classif2 = TopKRanker(lr())
    classif2.fit(X_train, Y_train)
    prediction = classif2.predict(X_test, top_k_list)
    micro = f1_score(Y_test, prediction, average='micro')
    macro = f1_score(Y_test, prediction, average='macro')
    return (micro, macro)

Exemple #29

0

Afficher le fichier

Fichier : models.py Projet : wangzhicong/5001_ind

    def create_model(self, model_type, parameters):

        if model_type == 'lr':
            model = lr()
        elif model_type == 'svm':
            model = svm()
        elif model_type == 'mlp':
            model = mlp()
        elif model_type == 'rf':
            model = rf()
        elif model_type == 'xgb':
            model = xgb()
        return model.set_params(**parameters)

Exemple #30

0

Afficher le fichier

Fichier : Models_Creation_&_Saving.py Projet : KalraH/Review_Project

def version1():  # Logistic Regression Model
    train_test_split(df["reviewText"], df["Positivity"], 100)

    features_train_vectorized = cv().fit_transform(features_train)
    features_test_vectorized = cv().transform(features_test)

    model = lr().fit(features_train_vectorized,
                     labels_train)  # Model creation for logistic regression
    predictions = model.predict(features_test_vectorized)

    ras(labels_test, predictions)  # Generating prediction score
    cm(labels_test, predictions)

    return model

Exemple #31

0

Afficher le fichier

Fichier : sparse_model.py Projet : dallascard/guac

    def __init__(self, model_type=None, column_names=None, metric='f1', **kwargs):
        self.model_type = model_type
        self.column_names = column_names
        self.params = kwargs
        self.trained = None
        self.metric = metric
        if model_type == 'LR':
            if self.params.get('regularization', None) is None:
                self.params['regularization'] = 'l1'
            if self.params.get('alpha', None) is None:
                self.params['alpha'] = 1.0
            self.model = lr(penalty=self.params['regularization'], C=self.params['alpha'])
        elif model_type == 'SVM' or model_type == 'SVMNB':
            if self.params.get('kernel', None) is None:
                self.params['kernel'] = 'rbf'

            if model_type == 'SVM':
                if self.params.get('alpha', None) is None:
                    self.params['alpha'] = 0.1
            else:  # elif model_type == SVMNB:
                self.params['kernel'] = 'linear'
                if self.params.get('alpha', None) is None:
                    self.params['alpha'] = 1
                if self.params.get('beta', None) is None:
                    self.params['beta'] = 0.25

            if self.params['kernel'] == 'linear':
                # override regularization parameter to avoid a conflict
                self.params['regularization'] = 'l2'
                self.model = svm.LinearSVC(C=self.params['alpha'])
            else:  # elif self.params['kernel'] != 'linear':
                if self.params.get('degree', None) is None:
                    self.params['degree'] = 3
                if self.params.get('gamma', None) is None:
                    self.params['gamma'] = 0.0
                if self.params.get('coef0', None) is None:
                    self.params['coef0'] = 0.0
                self.model = svm.SVC(C=self.params['alpha'], kernel=self.params['kernel'], degree=self.params['degree'],
                                     gamma=self.params['gamma'], coef0=self.params['coef0'])
        elif model_type == 'MNB':
            if 'alpha' not in self.params:
                self.params['alpha'] = 1.0
            self.model = MultinomialNB(alpha=self.params['alpha'], fit_prior=True)
        elif model_type == 'myMNB':
            if 'alpha' not in self.params:
                self.params['alpha'] = 1.0
            self.model = None
        else:
            self.model_type = 'default'
            self.model = None

Exemple #32

0

Afficher le fichier

Fichier : wind_classes.py Projet : zeshi1990/swe_reconstruction

 def linear_model(self, nldas_wind, type = 'speed'):
     X = nldas_wind
     if type == 'speed':
         y = self.wind_speed_anomaly
     else:
         y = self.wind_dir_anomaly
     mask = ~np.isnan(y)
     X = X[mask].reshape((len(X[mask]), 1))
     y = y[mask]
     lr_model = lr()
     lr_model.fit(X, y)
     est_y = lr_model.predict(X)
     std = np.sqrt(np.sum((est_y - y) ** 2) / (len(y) - 2))
     return lr_model, std

Exemple #33

0

Afficher le fichier

def predict_lr(X, y, X_train, X_test, y_train, y_test):
    clf = lr(solver='lbfgs', multi_class='ovr')
    print("======Logistic Regression======")
    clf.fit(X_train, y_train)
    pickle.dump(clf, open('logreg_trained_new.sav', 'wb'))
    y_pred = clf.predict(X_test)
    calc_accuracy("Logistic regression", y_test, y_pred)
    np.savetxt('submission_surf_lr.csv',
               np.c_[range(1,
                           len(y_test) + 1), y_pred, y_test],
               delimiter=',',
               header='ImageId,Label,TrueLabel',
               comments='',
               fmt='%d')

Exemple #34

0

Afficher le fichier

def LR_from_cfg(params):
    X_ = X[:]
    clf = lr(**params)
    if params['penalty'] == 'l2':
        if params['dual'] is True:
            if params['solver'] == 'liblinear':
                if params['multi_class'] == 'multinomial':
                    return 1 - 0.001
                else:
                    return 1 - cross_val_score(clf, X_, y, cv=5).mean()
            else:
                return 1 - 0.001
        else:
            if params['solver'] == 'liblinear' and params[
                    'multi_class'] == 'multinomial':
                return 1 - 0.001
            else:
                return 1 - cross_val_score(clf, X_, y, cv=5).mean()
    elif params['penalty'] == 'l1':
        if params['dual'] is True:
            return 1 - 0.001
        else:
            if params['solver'] == 'liblinear':
                if params['multi_class'] == 'multinomial':
                    return 1 - 0.001
                else:
                    return 1 - cross_val_score(clf, X_, y, cv=5).mean()
            elif params['solver'] == 'saga':
                return 1 - cross_val_score(clf, X_, y, cv=5).mean()
            else:
                return 1 - 0.001
    elif params['penalty'] == 'elasticnet':
        if params['dual'] is True:
            return 1 - 0.001
        else:
            if params['solver'] == 'saga':
                return 1 - cross_val_score(clf, X_, y, cv=5).mean()
            else:
                return 1 - 0.001
    elif params['penalty'] == 'none':
        if params['dual'] is True:
            return 1 - 0.001
        else:
            if params['solver'] == 'liblinear':
                return 1 - 0.001
            else:
                return 1 - cross_val_score(clf, X_, y, cv=5).mean()
    else:
        return 1 - cross_val_score(clf, X_, y, cv=5).mean()

Exemple #35

0

Afficher le fichier

Fichier : RV_coefficient.py Projet : dihuang0220/GraphEnsembleLearning

def evaluateNodeClassification(X_train, X_test, Y_train, Y_test):
    try:
        top_k_list = list(Y_test.toarray().sum(axis=1))
    except:
        top_k_list = list(Y_test.sum(axis=1))
    classif2 = TopKRanker(lr())
    try:
        classif2.fit(X_train, Y_train)
        prediction = classif2.predict(X_test, top_k_list)
    except:
        print('Could not fit node classification model')
        prediction = np.zeros(Y_test.shape)
    micro = f1_score(Y_test, prediction, average='micro')
    macro = f1_score(Y_test, prediction, average='macro')
    return prediction

Exemple #36

0

Afficher le fichier

Fichier : log_reg.py Projet : khalednakhleh/KickStarterChance

def log_reg(x, y, t, q):
    """ This function is an amalgamation of different minute tasks that 
    I just gatherd into a singal call function to ease work."""
    
    pred = lr(solver = "saga", tol = 0.001, max_iter = 600, n_jobs = -1, fit_intercept = True)
    pred.fit(x,y)                              # Predictor training
    g = pred.score(t,q)           # Predictor test
    pred = pred.predict(t)                     # Predicting correct labels
    
    # Printing some information for user
    print("------------------------------------------")
    print("accuracy rate is %{}" .format(round(g * 100 , 3)))
    print("Error rate is %{}" .format(round((1 - g) * 100 , 3)))
    
    return pred

Exemple #37

0

Afficher le fichier

    def train_model(self):
        '''
        Trains simple logistic regression using the class labels.
        No regularization. The Metonymi features do all of the heavy lifting!
        '''
        print('TRAINING MODEL...')
        labels = self.frame[:, -1]
        frame = scale(self.frame[:, :-1])
        self.train, self.test, self.train_labels, self.test_labels = \
        tts(frame, labels, random_state=26, test_size=.15)
        self.model = lr(max_iter=200)
        self.model.fit(self.train, self.train_labels)
        print('DONE!\n')

        return True

Exemple #38

0

Afficher le fichier

Fichier : metaclassifier_train.py Projet : saurabh-singh-17/secgov

def _train_SKLR_Classifier(extractedBases, lbls, params = {}):
    """ NLTK ME Training Wrapper"""

    Xtrn = makeSKFormat(extractedBases)
    ytrn = lbls

    C = params.get('C', 10)
    penalty = params.get('penalty', 'l1')
    class_weight = params.get('class_weight','auto')
    tol = params.get('tol', 1e-6)

    classifier = lr(C=C, penalty=penalty,
                    class_weight=class_weight, tol=tol)

    classifier.fit(Xtrn,ytrn)

    return classifier, list(classifier.classes_)

Exemple #39

0

Afficher le fichier

Fichier : classify_test.py Projet : benbo/botc

def classify(data_filename, label_filename, feature_dir, list_of_features, model_type='LR',
             regularizer='l1', alpha=1.0, verbose=1):

    labels = pd.read_csv(label_filename, header=0, index_col=0)

    if not os.path.exists(feature_dir):
        os.makedirs(feature_dir)

    # for each feature in feature_list:
    items = None
    feature_matrices = []
    column_names = []
    print "Loading features"
    for feature in list_of_features:
        feature_description = feature
        rows, columns, counts = feature_loader.load_feature(feature_description, feature_dir, data_filename, verbose=1)
        if items is None:
            items = rows
        else:
            assert items == rows
        if verbose > 0:
            print "Loaded", feature, "with shape", counts.shape
        feature_matrices.append(counts)
        column_names.append(columns)

    # concatenate all features together
    X = sparse.csr_matrix(sparse.hstack(feature_matrices))
    column_names = np.concatenate(column_names)
    if verbose > 0:
        print "Full feature martix size:", X.shape

    #return items, column_names, X
    if model_type == 'LR':
        model = lr(penalty=regularizer, C=alpha)
    elif model_type == 'SVM':
        model = svm.LinearSVC(C=alpha, penalty=regularizer)
    else:
        sys.exit('Model type ' + model_type + ' not supported')

    y = labels.as_matrix().ravel()
    model.fit(X, y)
    pred = model.predict(X)
    f1 = f1_score(y_true=y, y_pred=pred)
    print f1
    return {'loss': -f1, 'status': STATUS_OK}

Exemple #40

0

Afficher le fichier

Fichier : optimize_full_ensemble_classify_test.py Projet : anukat2015/ARKcat

def classify_one_model(feature_list, model_type='LR', regularizer='l1', alpha=1.0, converg_tol=0.01, verbose=1, folds=2, n_jobs=-1, score_eval='f1'):

    if model_type == 'LR':
        model = lr(penalty=regularizer, C=alpha, tol=converg_tol)
    elif model_type == 'SVM':
        model = svm.LinearSVC(penalty=regularizer, C=alpha, tol=converg_tol)
    else:
        sys.exit('Model type ' + model_type + ' not supported')

    train_X, train_Y = load_features(train_data_filename, train_label_filename, train_feature_dir, 
                                     feature_list, verbose)
    # Try loading dev data using train vocabulary, and not saving dev feature extractions
    dev_X, dev_Y = load_features(dev_data_filename, dev_label_filename, dev_feature_dir,
                                     feature_list, verbose, vocab_source=train_feature_dir)

    model.fit(train_X, train_Y)
    dev_pred_prob_Y = model.predict_proba(dev_X)
    
    return dev_pred_prob_Y, model, dev_Y

Exemple #41

0

Afficher le fichier

Fichier : script.py Projet : SharmileeS/LABirthRates

data.drop('F19', axis=1, inplace=True)
selector = selector.fit(data, y)

#print which features have been selected
print "ATTRIBUTES WHICH HAVE BEEN SELECTED\n"
for i in xrange(0,len(data.columns)):
	if(selector.support_[i]==True):
		print data.columns[i]

df1=data[['FAC_NAME','F1','F2','F3','F4','F5','F6','F7','F8','F9','F10','F11','F12','F13','F14','F15','F16','F17','F18','F19','F20','F21','F22']]
clf=SVC()  #???
scores=cv1(clf,df1,y,cv=10)
print "\nSVC Cross validated Scores:\n"
print scores

clf1=lr()
scores1=cv1(clf1,df1,y,cv=10)
print "\nLogistic Regression Cross validated Scores:\n"
print scores1

model = GaussianNB()
scores2=cv1(model,df1,y,cv=10)
print "\nNaive Bayes Cross validated Scores:\n"
print scores2

model = DecisionTreeClassifier()
scores3=cv1(model,df1,y,cv=10)
print "\nDecision Trees validated Scores:\n"
print scores3

clf=LinearSVC()

Exemple #42

0

Afficher le fichier

Fichier : logistic_regression.py Projet : bfetler/lending_club

def main():
    "main program"
    app = get_app_title()
    appf = get_app_file()
    plotdir = make_plotdir()
    
    loans_df, loans_y, test_df, test_y, numeric_vars = load_data()
    indep_vars = numeric_vars
    
    # skip scaling for now, score 0.71
    loans_X = loans_df
    test_X = test_df
    clf = lr()
    do_fit(clf, loans_X, loans_y, print_out=True)
    pred_y = do_predict(clf, test_X, test_y, print_out=True)  
    plot_predict(plotdir, app, appf, "rawvar", indep_vars, test_df, test_y, pred_y)

    # add scaling, score 0.90    
    loans_X, my_scaler = scale_train_data(loans_df, print_out=True)
    test_X = scale_test_data(my_scaler, test_df)
    
    clf = lr()
    do_fit(clf, loans_X, loans_y, print_out=True)
    pred_y = do_predict(clf, test_X, test_y, print_out=True)  
    plot_predict(plotdir, app, appf, "allvar", indep_vars, test_df, test_y, pred_y)
    print("columns:", indep_vars)
#   print_coefs(clf)
    X_labels = list(loans_df.columns)
#   print_lr_coefs(clf, X_labels)
    plist = print_lr_coefs(clf, indep_vars)

# find score using only top6
    top6 = [p[0] for p in plist[:6]]
    print("top6:", top6)
    loans_X = loans_df[top6]
    test_X = test_df[top6]
    loans_X, my_scaler = scale_train_data(loans_X, print_out=True)
    test_X = scale_test_data(my_scaler, test_X)
    clf = lr()
    do_fit(clf, loans_X, loans_y, print_out=True)
    pred_y = do_predict(clf, test_X, test_y, print_out=True)
    print_lr_coefs(clf, top6)
    plot_predict(plotdir, app, appf, "top6", top6, test_df, test_y, pred_y)

    do_roc(clf, test_X, test_y, "top6", top6, app, appf, plotdir)
    
#    arr = clf.decision_function(loans_df)
#    print("decision function:", arr.shape, arr)  # shape (1873,)
##    clf.decision_function(loans_df)
#    print_coefs(clf)
# traditional coefs in "frequentist" style?
#    proba = clf.predict_proba(loans_X)
#    print("proba", proba.shape, proba)
    
    explore_params(loans_X, loans_y, plotdir, app, appf)
    
    # run optimization routine
    clf = lr()
#    init_list = [indep_vars[0], indep_vars[1]]
#    random_opt(clf, indep_vars, init_list, loans_df, loans_y, print_out=True)
    opt_score, opt_list = run_opt(clf, numeric_vars, loans_df, loans_y, app, appf, plotdir, rescale=True)
    # accuracy 73% +- 3% with no scaling  (90% with scaling)
#    print_coefs(clf)

    # redo exploration with optimized columns
    loans_X = loans_df[opt_list]
    test_X = test_df[opt_list]
    loans_X, my_scaler = scale_train_data(loans_X, print_out=True)
    test_X = scale_test_data(my_scaler, test_X)
#    print("loans_X head\n", loans_X[:3])
    explore_params(loans_X, loans_y, plotdir, app, appf+"opt_")
    # accuracy 73% due to no scaling
    
    clf = lr()
    cross_validate(clf, loans_X, loans_y, print_out=True)
    
    clf = lr()
    do_fit(clf, loans_X, loans_y, print_out=True)
    pred_y = do_predict(clf, test_X, test_y, print_out=True)
    print("opt_list columns:", opt_list)
#   print_coefs(clf)
#   print_lr_coefs(clf, X_labels)
    print_lr_coefs(clf, opt_list)
    plot_predict(plotdir, app, appf, "optvar", opt_list, test_df, test_y, pred_y)

Exemple #43

0

Afficher le fichier

Fichier : self_training_grid_new.py Projet : clur/Thesis

    print 'f1 macro:', res
    print
    # color = cm(1. * i / NUM_COLORS)  # color will now be an RGBA tuple
    # cm = plt.get_cmap('gist_rainbow')
    # fig = plt.figure(figsize=(8.0, 5.0))
    # ax = fig.add_subplot(111)
    # # ax.set_color_cycle([cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)])
    # ax.plot(range(len(scores)), scores, label=str(threshold))
    # ax.text(len(scores) - 1, scores[len(scores) - 1], threshold, fontsize='smaller')
    # plt.show()
    print name
    return res


vec_list = [tf(), cv()]
clf_list = [svc(), lr()]
threshold_list = np.arange(0.5, 3, 0.5)
print len(threshold_list)
# results_size = (len(vec_list), len(clf_list),len(threshold_list))
# results = np.zeros(results_size, dtype = np.float)
# a, b, c = range(3), range(3), range(3)
# def my_func(x, y, z):
#     return (x + y + z) / 3.0, x * y * z, max(x, y, z)

grids = np.vectorize(run)(*np.ix_(threshold_list, vec_list, clf_list))
# mean_grid, product_grid, max_grid = grids
print len(grids)
try:
    print grids.shape
except:
    print type(grids)

Exemple #44

0

Afficher le fichier

Fichier : Logistic_regression_sklearn_2.py Projet : dwarakanandan/MachineLearning

	x[:,16] = (x1**4)*x2
	x[:,17] = (x1**3)*(x2**2)
	x[:,18] = (x1**2)*(x2**3)
	x[:,19] = x1*(x2**4)
	x[:,20] = x2**5
	x[:,21] = x1**6
	x[:,22] = (x1**5)*x2
	x[:,23] = (x1**4)*(x2**2)
	x[:,24] = (x1**3)*(x2**3)
	x[:,25] = (x1**2)*(x2**4)
	x[:,26] = x1*(x2**5)
	x[:,27] = x2**6
	return x

data = np.loadtxt("data_microchip.txt",delimiter=",")
m = data[:,0].size
x1 = data[:,0]
x2 = data[:,1]
x = map_features(x1,x2,m)
y = data[:,2]

reg = lr(C=10)
reg.fit(x,y)

s = reg.coef_.size
theta_ans = np.zeros((s+1))
theta_ans[0] = reg.intercept_[0]
theta_ans[1:] = reg.coef_
theta_ans = theta_ans.reshape(s+1,1)
print "%.2f%% accuracy"%(reg.score(x,y)*100)

Exemple #45

0

Afficher le fichier

Fichier : Linear_regression_sklearn_1.py Projet : dwarakanandan/MachineLearning

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression as lr

data = np.loadtxt("ex1data1.txt",delimiter = ',')
m = data[:,0].size
x = data[:,0].reshape(m,1)
y = data[:,1]
a = lr(fit_intercept=True)
a.fit(x,y)
print a.coef_
print a.intercept_
print a.score(x,y)
plt.scatter(x,y)
plt.plot(x,a.predict(x))
plt.show()

Exemple #46

0

Afficher le fichier

Fichier : logistic_regression.py Projet : navrug/Boltzmann-s-Cuisine

train_data = np.load('train_data.npy')

if load_saved:
    report = np.load("report.npy").item()    
    rbm = RBM(len(train_data), report["n_hidden"], report["batch_size"])
    rbm.W = report["W"]
    rbm.hbias = report["hbias"]
    rbm.vbias = report["vbias"]

Y = np.argmax(train_data[:,:20], axis=1)
train_data = train_data[:,20:]
X = sigmoid(np.dot(train_data, rbm.W) + rbm.hbias)
#X = train_data


classifier = lr(0.01, solver = 'lbfgs', multi_class='multinomial')
classifier.fit(X, Y)

test_data = np.load('test_data.npy')
test_X = sigmoid(np.dot(test_data, rbm.W) + rbm.hbias)
#test_X = test_data

pred = classifier.predict(test_X)
train_ids, train_cuisines, train_ingredients = read_data('train.json')
test_ids, test_cuisines, test_ingredients = read_data('test.json')
del train_ids, train_ingredients, test_cuisines, test_ingredients
le = LabelEncoder()
le.fit(train_cuisines)
pred = le.inverse_transform(pred)
create_submission(test_ids, pred)

Exemple #47

0

Afficher le fichier

Fichier : Logistic_regression_sklearn_1.py Projet : dwarakanandan/MachineLearning

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression as lr

def show_scatter():
	data_admitted = data[data[:,2]==1]
	data_notadmitted = data[data[:,2]==0]
	plt.scatter(data_admitted[:,0],data_admitted[:,1],c='r',s=50)
	plt.scatter(data_notadmitted[:,0],data_notadmitted[:,1],c='b',s=50)
	x_coordinates = [0,-theta_ans[0][0]/theta_ans[1][0]]
	y_coordinates = [-theta_ans[0][0]/theta_ans[2][0],0]
	plt.plot(x_coordinates,y_coordinates)
	plt.show()


data = np.loadtxt("data_logistic_regression.txt",delimiter=",")
m = data[:,0].size
x = data[:,0:2]
y = data[:,2]

reg = lr(C=3.2)
reg.fit(x,y)
s = reg.coef_.size
theta_ans = np.zeros((s+1))
theta_ans[0] = reg.intercept_[0]
theta_ans[1:] = reg.coef_
theta_ans = theta_ans.reshape(s+1,1)
print theta_ans
print reg.score(x,y)*100,"% accuracy"
show_scatter()

Exemple #48

0

Afficher le fichier

Fichier : model_lr.py Projet : Noahs-ARK/ARKcat

 def train(self, train_X, train_Y):
     self.model = lr(penalty=self.hp['regularizer'], C=self.hp['alpha'], tol=self.hp['converg_tol'])
     self.model.fit(train_X, train_Y)

Exemple #49

0

Afficher le fichier

Fichier : linear_regression_matthias.py Projet : Fischmaa/projetspe

import pandas as pa
from sklearn.linear_model import LinearRegression as lr
import matplotlib.pyplot as plt
import random

random.seed(1)

tabtrain = pa.read_csv('sources/train.csv')
tabtest = pa.read_csv('sources/test.csv')

# On forme les tableaux des features
x_train = tabtrain.drop(['datetime','count','casual','registered'],1)
x_test = tabtest.drop(['datetime'],1)


# On forme les tableaux des résultats
y_train = tabtrain['count']


model = lr(5)

model.fit(x_train, y_train)

y_test = model.predict(x_test)
y_test = pa.DataFrame(y_test)
y_test.index = tabtest['datetime']

print(y_test)

Exemple #50

0

Afficher le fichier

Fichier : clf_speed.py Projet : bfetler/sklearn_clf_speed

def logistic_regression_speed_test(dftrain, dftrain_y, plotdir):
    atitle = 'Logistic Regression'
    afile = 'logreg'
    clf = lr()
#   speed_test_medium(clf, dftrain, dftrain_y, atitle, afile, plotdir)
    speed_test_large(clf, dftrain, dftrain_y, atitle, afile, plotdir)

Exemple #51

0

Afficher le fichier

Fichier : kLogisticRegression.py Projet : abdulwahidgul24085/sfcc

testDf = auxiliary.initialise_test(False)
ids = testDf['Id'].values
# Id,Dates,DayOfWeek,PdDistrict,Address,X,Y,Year,Week,Hour
testDf = testDf.drop(['Id', 'Dates', 'Address', 'X', 'Y'], axis=1)

# Random Forest Algorithm
print list(trainDf.columns.values)
print list(testDf.columns.values)
#print list(trainDf.X.values)

# back to numpy format
trainData = trainDf.values
testData = testDf.values

print 'Training...'
logit = lr()
logit = logit.fit(trainData[0::,1::], trainData[0::,0])

print 'Predicting...'
output = logit.predict_proba(testData).astype(float)
output = output.tolist()

predictions_file = open("../submissionLR.csv", "wb")
open_file_object = csv.writer(predictions_file)
open_file_object.writerow(["Id",'ARSON','ASSAULT','BAD CHECKS','BRIBERY','BURGLARY','DISORDERLY CONDUCT',
                           'DRIVING UNDER THE INFLUENCE','DRUG/NARCOTIC','DRUNKENNESS','EMBEZZLEMENT','EXTORTION',
                           'FAMILY OFFENSES','FORGERY/COUNTERFEITING','FRAUD','GAMBLING','KIDNAPPING','LARCENY/THEFT',
                           'LIQUOR LAWS','LOITERING','MISSING PERSON','NON-CRIMINAL','OTHER OFFENSES',
                           'PORNOGRAPHY/OBSCENE MAT','PROSTITUTION','RECOVERED VEHICLE','ROBBERY','RUNAWAY',
                           'SECONDARY CODES','SEX OFFENSES FORCIBLE','SEX OFFENSES NON FORCIBLE','STOLEN PROPERTY',
                           'SUICIDE','SUSPICIOUS OCC','TREA','TRESPASS','VANDALISM','VEHICLE THEFT','WARRANTS',

Exemple #52

0

Afficher le fichier

Fichier : linear_regression_matthias.py Projet : jorianvb/projetspe

import pandas as pa
from sklearn.linear_model import LinearRegression as lr
import matplotlib.pyplot as plt
import random

random.seed(1)

tabtrain = pa.read_csv('sources/train.csv')
tabtest = pa.read_csv('sources/test.csv')

# On forme les tableaux des features
x_train = tabtrain.drop(['datetime','count','casual','registered'],1)
x_test = tabtest.drop(['datetime'],1)


# On forme les tableaux des résultats
y_train = tabtrain['count']


model = lr()

model.fit(x_train, y_train)

y_test = model.predict(x_test)
y_test = pa.DataFrame(y_test)
y_test.index = tabtest['datetime']

print(y_test)

Exemple #53

0

Afficher le fichier

Fichier : titanic.py Projet : takushi-m/kaggle-titanic

    x["miss"] = data.Name.map(lambda x:1 if x.lower().find("miss")>=0 else 0)
    x["master"] = data.Name.map(lambda x:1 if x.lower().find("master")>=0 else 0)

    x["embark_C"] = data.Embarked.map(lambda x:1 if x=="C" else 0)
    x["embark_Q"] = data.Embarked.map(lambda x:1 if x=="Q" else 0)
    x["embark_S"] = data.Embarked.map(lambda x:1 if x=="S" else 0)

    #return x
    p = poly(2, interaction_only=False)
    return p.fit_transform(x)

if __name__ == "__main__":
    data = pd.read_csv("./data/train.csv")

    x = makeInput(data)
    y = data.Survived

    model = lr(C=0.2)
    model.fit(x,y)

    test_data = pd.read_csv("./data/test.csv")
    x_test = makeInput(test_data)
    predict = model.predict(x_test)
    predict = pd.Series(predict)

    y_test = pd.DataFrame({
        "PassengerId": test_data.PassengerId
        ,"Survived": predict
    })
    y_test.to_csv("./predict.csv", index=False)