Exemple #1
0
    def calibrate(self, data, dt):
        '''
        Parameters
        ---------------------------
        data: 1-D array
            historical time series equally distanced

        dt: float
            time step increment between 2 consecutive data points
        '''

        reg = linreg()
        reg.fit(data[:-1].reshape(-1, 1), data[1:])
        predict = []
        predict.append(data[0])
        for i in range(1, len(data)):
            predict.append(reg.coef_ * predict[i - 1] + reg.intercept_)

        predict = np.array(predict)
        ssr = ((predict - data)**2).mean()
        self.parameters = {
            'long term mean':
            reg.intercept_ / dt,
            'reversion speed':
            -np.log(reg.coef_[0]) / dt,
            'volatility':
            ssr[0] * ((-2 * np.log(reg.coef_[0]) / dt /
                       (1 - reg.coef_[0]**2))**.5)
        }
Exemple #2
0
def fit_linear_model2(X, y, results, keys,
                     num_cv = 5, 
                     verbose = False, 
                     plot_results = False
                     ):
    X = pp.scale(X)
    clf = []
    R2 = []
    coef = []
    prob = []
    score = []
    group_keys = []
    # Now do cross-validation to estimate accuracy
    if num_cv > 1:
        kf = KFold(n = len(y), n_folds = num_cv)
        for train, test in kf:
            X_train, X_test, y_train, y_test, results_test, keys_test = X[train], X[test], y[train], y[test], results[test], keys[test]
            clf_temp2 = linreg(
                            fit_intercept = False)
            clf_temp2.fit(X_train,y_train)
            pred = clf_temp2.predict(X_test)
            clf.append(clf_temp2)
            R2.append(clf_temp2.score(X_test,y_test))
            coef.append(clf_temp2.coef_)
            prob.append(diff_to_prob(pred))
            score.append(lossFx(results_test,pred))
            group_keys.append(keys_test)
    else:
        clf_temp2 = linreg(
                fit_intercept = False)
        clf_temp2.fit(X,y)
        pred = clf_temp2.predict(X)
        clf = clf_temp2
        R2 = clf_temp2.score(X,y)
        coef = clf_temp2.coef_
        prob = diff_to_prob(pred)
        score = lossFx(results,pred)
        group_keys = keys
    if num_cv > 1:
        return clf, R2, score, coef, prob, kf, group_keys
    else:
        return clf, R2, score, coef, prob, group_keys
def run_var_list(new_vars, loansData):
    "run fit and predict with new variable list"
    train_df, train_y, test_df, test_y = load_data(loansData, new_vars)
    train_X, my_scaler = scale_train_data(train_df)
    test_X = scale_test_data(my_scaler, test_df)
    regr = linreg()
    regr.fit(train_X, train_y)
    sort_coefs(list(train_df.columns), regr.coef_, regr.intercept_)
    cross_validate(regr, train_X, train_y, cv=10, print_out=True)
    score = regr.score(train_X, train_y)
    print('Regression fit R^2 score %.4f' % score)
    pscore = regr.score(test_X, test_y)
    print('Regression predict R^2 score %.4f' % pscore)
Exemple #4
0
def main():
    game_data = retrieve_mysql_data()
    db_cursor.close()
    nbadb.close()

    sum_correct = 0
    sum_total = 0
    sum_correct_outcome = 0
    veg_avgs = np.zeros(shape=20)
    avgs = np.zeros(shape=20)

    for i in range(0, 20):
        random.seed(i)
        np.random.shuffle(game_data)
        x_data = game_data[:, :-2]
        y_data = game_data[:, -2]
        vegas_data = game_data[:, -1]
        kf = KFold(n_splits=5, shuffle=False)

        for train_index, test_index in kf.split(x_data):
            train_x_raw, test_x_raw = x_data[train_index], x_data[test_index]
            train_y, test_y = y_data[train_index], y_data[test_index]

            X_train = standardize_add_bias(train_x_raw, train_x_raw)
            X_test = standardize_add_bias(test_x_raw, train_x_raw)

            reg = linreg().fit(X_train, train_y)
            y_exp = X_test.dot(reg.coef_) + reg.intercept_
            for pred, act, veg in zip(np.nditer(y_exp), np.nditer(test_y),
                                      np.nditer(vegas_data)):
                sum_total += 1
                if pred > veg and act > veg:
                    sum_correct += 1
                elif pred < veg and act < veg:
                    sum_correct += 1
                if pred > 0 and act > 0:
                    sum_correct_outcome += 1
                elif pred < 0 and act < 0:
                    sum_correct_outcome += 1

            veg_avgs[i] = np.mean(np.abs(vegas_data[test_index] - test_y))
            avgs[i] = np.mean(np.abs(np.round(y_exp * 2) / 2 - test_y))

    print("Average deviation from actual point spread: {:.3f}".format(
        np.mean(avgs)))
    print("Vegas deviation from actual point spread: {:.3f}".format(
        np.mean(veg_avgs)))
    print("Percentage of correct spread predictions: {}".format(sum_correct /
                                                                sum_total))
    print("Percentage of correct game predictions: {}".format(
        sum_correct_outcome / sum_total))
Exemple #5
0
    def test_model(X, Y):
        _l = linreg()
        _ = []

        for i in range(0, 10):
            X_train, X_test, y_train, y_test = train_test_split(
                #X,Y, test_size=0.3)
                X,
                preprocessing.MinMaxScaler(
                    (0, 1)).fit_transform(Y.reshape(-1, 1)),
                test_size=0.2)
            model = _l.fit(X_train, y_train)  #lab_enc.fit_transform(Y))
            predictions = model.predict(X_test)
            _.append(model.score(X_test, y_test))

        print('Averaged model score over 10 iterations: ' + str(sum(_) / 10))
Exemple #6
0
def first_reversal(SessDict):
    rzone_early = slice(25 - 6, 32 - 6)
    rzone_late = slice(35 - 6, 42 - 6)
    RZONE_LICKS = {}
    slopes = np.zeros([len(SessDict.keys()), ])
    for m, (mouse, days) in enumerate(SessDict.items()):
        print(mouse)
        LR, LICKS, SPEED = [], [], []
        transition_trials = []
        early_rzone_licks = []
        for i, day in enumerate(days[:2]):

            for sess_ind, session in enumerate(day):
                sess = TwoPUtils.sess.Session(basedir_VR=basedir_VR, mouse=mouse, date=session['date'],
                                              scene=session['scene'],
                                              session=session['session'], VR_only=True, prompt_for_keys=False)
                sess.align_VR_to_2P()

                # get LR value for each trial
                lr_trial = get_LR_trial(sess)

                # make position binned lick rates and speed
                sess.add_timeseries(licks=sess.vr_data['lick']._values, speed=sess.vr_data['dz']._values)
                sess.add_pos_binned_trial_matrix(('licks', 'speed'), 't', min_pos=6, max_pos=43, bin_size=1,
                                                 mat_only=True)

                licks_rz_early = sess.trial_matrices['licks'][:, rzone_early].mean(axis=-1)

                if i == 0 and sess_ind == 0:
                    baseline = np.mean(licks_rz_early[lr_trial == -1])
                else:
                    licks = licks_rz_early[lr_trial == -1] / baseline
                    licks[np.isnan(licks)] = 0
                    early_rzone_licks.append(licks)

        #         f, ax = plt.subplots()
        early_rzone_licks = np.concatenate(early_rzone_licks)
        lr = linreg().fit(np.arange(40)[:, np.newaxis], early_rzone_licks[:40])
        slopes[m] = lr.coef_
        #         ax.plot(early_rzone_licks)

        #         ax.plot(sp.ndimage.filters.gaussian_filter1d(early_rzone_licks,5))

        RZONE_LICKS[mouse] = early_rzone_licks
    return RZONE_LICKS, slopes
def main():
    "main program"
    
    loansData = read_data()
    numeric_vars = get_numeric_vars()
    train_df, train_y, test_df, test_y = load_data(loansData, numeric_vars)
    print("train_df head\n", train_df[:3])
    print("train_y head\n", train_y[:3])
    plotdir = make_plotdir() 

# add scaling
    train_X, my_scaler = scale_train_data(train_df)
    test_X = scale_test_data(my_scaler, test_df)
    
    regr = linreg()
    regr.fit(train_X, train_y)
#    print('regr methods', dir(regr))
#   print('columns', list(train_df.columns), 'Intercept')
#   print('coefs', regr.coef_, regr.intercept_)
    coefs = sort_coefs(list(train_df.columns), regr.coef_, regr.intercept_)

    fitpts = regr.predict(train_X)
    plot_predict_scatter(plotdir, "train", fitpts, train_y)

    cross_validate(regr, train_X, train_y, cv=10, print_out=True)
    score = regr.score(train_X, train_y)
    print('Regression fit R^2 score %.4f' % score)
    
    pred = regr.predict(test_X)
#    pscore = sum(np.array(test_y) == pred)  # need np.tol.diff
    pscore = sum(np.abs(test_y - pred)) / len(test_y)
    print('Regression predict diff average %.4f' % pscore)
#    pscore = np.sqrt(sum( (test_y - pred)*(test_y - pred) ))
    pscore = regr.score(test_X, test_y)
    print('Regression predict R^2 score %.4f' % pscore)

    plot_predict_scatter(plotdir, "test", pred, test_y)

    # try fit with fewer top variables: 5, 4, 3, 2
    for top in range(5, 1, -1):
       new_vars = get_top_vars(coefs, top)
       print('new_vars', new_vars)
       run_var_list(new_vars, loansData)
def std2kappa(std,Kbel,Kup):
    """The standard deviation transformer:
        This function transforms a given standard deviation value to the Kappa value of the von Mises distribution.
        For this purpose, interpolation with linear regression for a given Kappa interval is done. If the correlation between
        Kappa and standard deviation is smaller than 0.99, or if the estimated Kappa is outside of the Kappa interpolation interval,
        exception is raised.
        
        See also kappa_investigation.py
        
        Parameters
        ----------
        std: float. The standard deviation value which is wished to be transformed to the Kappa value.
        Kbel: float. The lower limit of the Kappa interpolation interval.
        Kup: float. The upper limit of the Kappa interpolation interval.
        
        Returns
        -------
        model.intercept_+std*model.coef_: float. The desired Kappa value of the given standard deviation.
    """
    x=np.linspace(-np.pi,np.pi,num=100*2*np.pi+1)#Create an array spanning from -pi to +pi with a length of 101 (0 in the middle,
                                                 #50 values negative, 50 values positive) 
    kInt=np.linspace(Kbel,Kup,2001)#The kappa interval for interpolation, spanning from Kbel and Kup, with 2001 bins total.
    distCom=[]#List of von Mises distributions with different Kappa values chosen from kInt, normalized by total area
    stdCom=[]#Standard deviation values of the distributions in distCom, calculated with the formula sqrt(sum(x**2*y)) whereas x is 
             #the distribution variable and y is the relative density of the distribution variable.
    for i in range(0,len(kInt)):
        distCom.append(1/(2*np.pi)*np.e**(kInt[i]*np.cos(x-0)))
        distCom[i]=distCom[i]/sum(distCom[i])
        stdCom.append(np.sqrt(sum(x**2*distCom[i])))
    model=linreg().fit(np.asarray(np.rad2deg(stdCom)).reshape(-1,1),kInt)#creating the linear regression model, x value has to be transposed in advance!
                                                                         #np.reshape(-1,1) creates an nx1 matrix out of the array.
    model.score(np.asarray(np.rad2deg(stdCom)).reshape(-1,1),kInt)#Returns the coefficient of determination R^2 of the prediction.
    model.intercept_#the intercept (kappa for stdvM=0), this value is not to be taken seriously.
    model.coef_#the coefficient coefficient, by which x value decays.
    if model.score(np.asarray(np.rad2deg(stdCom)).reshape(-1,1),kInt)<0.99:#Make sure the correlation coefficient>0.99
        raise Exception("The fit is not good enough. Correlation coefficient=%s"%(model.score(np.asarray(np.rad2deg(stdCom)).reshape(-1,1),kInt)))
    if (model.predict(std)>Kbel and model.predict(std)<Kup)==False:#Make sure the estimated Kappa is inside the interpolation interval.
        raise Exception("WARNING! The estimated Kappa is not in given interval. Interval=[%s,%s], estimated Kappa=%s"%(Kbel,Kup,model.intercept_+std*model.coef_))
    return model.intercept_+std*model.coef_  #this function is useful to estimate the Kcent in colmod (colclass.py)!
### ages and net_worths need to be reshaped into 2D numpy arrays
### second argument of reshape command is a tuple of integers: (n_rows, n_columns)
### by convention, n_rows is the number of data points
### and n_columns is the number of features
ages       = numpy.reshape( numpy.array(ages), (len(ages), 1))
net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1))
from sklearn.cross_validation import train_test_split
ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths, test_size=0.1, random_state=42)

### fill in a regression here!  Name the regression object reg so that
### the plotting code below works, and you can see what your regression looks like

from sklearn.linear_model import LinearRegression as linreg

reg=linreg()
reg.fit(ages_train,net_worths_train)

print ("Slope: ",reg.coef_)
print ("Score: ",reg.score(ages_test,net_worths_test))



try:
    plt.plot(ages, reg.predict(ages), color="blue")
except NameError:
    pass
plt.scatter(ages, net_worths)
plt.show()

Exemple #10
0
 def train(self):
     _l = linreg()
     self.model = _l.fit(self.X, self.Y)
Exemple #11
0
y = data.iloc[:, -1].values

#splituniversaldataset(train:test)
#library:sklearn
#module:model_selection
#classtrain_test_split
from sklearn.model_selection import train_test_split as tts
x_train, x_test, y_train, y_test = tts(x, y, test_size=0.3, random_state=3)

#algorithmselection
#linearregression
#library:sklearn
#module:linear_model
#class:LinearRegression
from sklearn.linear_model import LinearRegression as linreg
model_linreg = linreg()

#trainthemodel
model_linreg.fit(x_train, y_train)

#Testthemodel
#predictingoutput
y_pred = model_linreg.predict(x_test)

#Checkingaccuracy
accuracy = model_linreg.score(x_test, y_test)
print('Linear regression accuracy:', accuracy)

#Visualzsation
#heatmap
import seaborn as sb
Exemple #12
0
def dff(f,  trial_starts, teleports,f_neu = None, neuropil_method = None, bleedthrough_ts = None, neu_bleedthrouh_ts = None):

    '''


    '''

    f_ = np.zeros(f.shape) * np.nan


    # keep only the fluorescence on each trial
    if f_neu is not None:
        f_neu_ = np.zeros(f_neu.shape) * np.nan

    for i, (start, stop) in enumerate(zip(trial_starts.tolist(), teleports.tolist())):
        f_[:, start - 1:stop - 1] = f[:, start - 1:stop - 1]

        if f_neu is not None:
            f_neu_[:, start - 1:stop - 1] = f_neu[:, start - 1:stop - 1]


    # green channel bleedthrough correction: regress green channel from red channel
    # For each cell, predict red from green channel, subtract prediction to get residual, and add back in intercept;
    # So red signal will be residual+intercept
    nanmask = ~np.isnan(f_[0, :])


    if bleedthrough_ts is not None:
        for cell in range(f_.shape[0]):
            lr = linreg().fit(bleedthrough_ts[cell:cell + 1, nanmask].T, f_[cell, nanmask])  # linear regression from scikitlearn
            f_[cell, nanmask] = f_[cell, nanmask] - lr.predict(bleedthrough_ts[cell:cell + 1, nanmask].T) + lr.intercept_

    if f_neu is not None and neu_bleedthrouh_ts is not None:
        for cell in range(f_neu_.shape[0]):
            lr = linreg().fit(neu_bleedthrouh_ts[cell:cell + 1, nanmask].T, f_neu_[cell, nanmask])
            f_neu_[cell, nanmask] = f_neu_[cell, nanmask] - lr.predict(neu_bleedthrouh_ts[cell:cell + 1, nanmask].T) + lr.intercept_

    # once bleedthrough is subtracted, do neuropil correction on both channels

    if neuropil_method == 'subtract':
        f_ -= .7 * f_neu_
    elif neuropil_method == 'regress':
        # F = F_
        raise NotImplementedError
    elif neuropil_method is None:
        pass

    # Caluclate baseline for chan 1
    flow = sp.ndimage.filters.gaussian_filter(f_[:, nanmask], [0., 15])  # cut out ITIs and smooth signal
    flow = sp.ndimage.filters.minimum_filter1d(flow, int(500 * 15))  # minimum filter, taking min val over 15 sec
    flow = sp.ndimage.filters.maximum_filter1d(flow, int(500 * 15))  # max filter with same window (dilation)

    # to get deltaF/F: subtract baseline from initial signal, divide by abs(baseline)
    # baseline can sometimes end up as negative due to regression
    # -- red dff --
    dff = np.zeros(f_.shape) * np.nan
    dff[:, nanmask] = (f_[:, nanmask] - flow) / np.abs(flow)

    # Smooth the deltaF/F transients by 2 time bins
    for i, (start, stop) in enumerate(zip(trial_starts.tolist(), teleports.tolist())):
        dff[:, start - 1:stop - 1] = sp.ndimage.filters.gaussian_filter1d(dff[:, start - 1:stop - 1], 2, axis=1)


    return dff
Exemple #13
0
def dff_dual(F_red,
             Fneu_red,
             F_green,
             Fneu_green,
             trial_starts,
             teleport_starts,
             method_red='regress', method_green='regress'):

    '''
    calculate dF/F for two channels, red and green
    regress green from red
    
    inputs:
        F_red: ROI fluorescence for red chan
        Fneu_red: neuropil fluorescence for red chan
        F_green: ROI fluorescence for green chan
        Fneu_green: neuropil fluorescence for green chan
        trial_starts: timeseries of trial start indices
        teleport_starts: timeseries of teleport start indices
        method_red, method_green: 'regress' or 'subtract' - how to correct for neuropil
    
    outputs:
        dFF_red
        dFF_green
    '''

    
    F = np.zeros(F_red.shape)*np.nan #red
    F2 = np.zeros(F_green.shape)*np.nan #green

    #keep only the fluorescence on each trial
    Fneu = np.zeros(F_red.shape)*np.nan
    Fneu2 = np.zeros(F_green.shape)*np.nan
    for i, (start,stop) in enumerate(zip(trial_starts.tolist(),teleport_starts.tolist())):
        F[:,start-1:stop-1] = F_red[:,start-1:stop-1]
        F2[:,start-1:stop-1] = F_green[:,start-1:stop-1]

        Fneu[:,start-1:stop-1] = Fneu_red[:,start-1:stop-1]
        Fneu2[:,start-1:stop-1] = Fneu_green[:,start-1:stop-1]

    # green channel bleedthrough correction: regress green channel from red channel
    # For each cell, predict red from green channel, subtract prediction to get residual, and add back in intercept;
    # So red signal will be residual+intercept
    nanmask = ~np.isnan(F[0,:])
    F2_ = np.copy(F2)
    F_ = np.copy(F)

    for cell in range(F.shape[0]):
        lr = linreg().fit(F2[cell:cell+1,nanmask].T,F[cell,nanmask]) #linear regression from scikitlearn
        F[cell,nanmask] = F[cell,nanmask]-lr.predict(F2[cell:cell+1,nanmask].T) + lr.intercept_
        F_[cell,nanmask] = F_[cell,nanmask]-lr.predict(F2[cell:cell+1,nanmask].T) - .7*(lr.predict(Fneu[cell:cell+1,nanmask].T)) + lr.intercept_

        lr = linreg().fit(Fneu2[cell:cell+1,nanmask].T,Fneu[cell,nanmask])
        Fneu[cell,nanmask] = Fneu[cell,nanmask]-lr.predict(Fneu2[cell:cell+1,nanmask].T) + lr.intercept_

        #regress out F2 neuropil from F2
        lr = linreg().fit(Fneu2[cell:cell+1,nanmask].T,F2_[cell,nanmask])
        F2_[cell,nanmask] = F2_[cell,nanmask] - .7*(lr.predict(Fneu2[cell:cell+1,nanmask].T)) + lr.intercept_

    # once bleedthrough is subtracted, do neuropil correction on both channels
    
    if method_red == 'subtract':
        F -= .7*Fneu
    elif method_red == 'regress':
        F = F_
    elif method_red is None:
        pass
        
    if method_green == 'subtract':
        F2 -= .7*Fneu2 # subtraction, GRABDA
    elif method_green == 'regress':
        F2 = F2_ # regression, GRABDA
    elif method_green is None:
        pass


    # Caluclate baseline for chan 1
    Flow = sp.ndimage.filters.gaussian_filter(F[:,nanmask],    [0., 15]) #cut out ITIs and smooth signal
    Flow = sp.ndimage.filters.minimum_filter1d(Flow,    int(500*15)) #minimum filter, taking min val over 15 sec
    Flow = sp.ndimage.filters.maximum_filter1d(Flow,    int(500*15)) #max filter with same window (dilation)
 
    # to get deltaF/F: subtract baseline from initial signal, divide by abs(baseline)
    # baseline can sometimes end up as negative due to regression
    # -- red dff --
    dFF_red = np.zeros(F_red.shape)*np.nan 
    dFF_red[:,nanmask] = (F[:,nanmask]-Flow)/np.abs(Flow) 

    # Calculate baseline for chan 2
    Flow = sp.ndimage.filters.gaussian_filter(F2[:,nanmask],    [0., 15])
    Flow = sp.ndimage.filters.minimum_filter1d(Flow,    int(500*15))
    Flow = sp.ndimage.filters.maximum_filter1d(Flow,    int(500*15))
    # -- green dff --
    dFF_green = np.zeros(F_green.shape)*np.nan
    dFF_green[:,nanmask] = (F2[:,nanmask]-Flow)/np.abs(Flow)

    # Smooth the deltaF/F transients by 2 time bins
    for i, (start,stop) in enumerate(zip(trial_starts.tolist(),teleport_starts.tolist())):
        dFF_red[:,start-1:stop-1] = sp.ndimage.filters.gaussian_filter1d(dFF_red[:,start-1:stop-1],2,axis=1)
        dFF_green[:,start-1:stop-1] = sp.ndimage.filters.gaussian_filter1d(dFF_green[:,start-1:stop-1],2,axis=1)

    

    return dFF_red, dFF_green
# Start training the three different models (with multithreading support)
size_model = None
message_readability_model = None
message_length_model = None

if ("knn" in sys.argv):
    size_model = KNN(n_neighbors=n_neighbors, n_jobs=8)
    message_readability_model = KNN(n_neighbors=n_neighbors, n_jobs=8)
    message_length_model = KNN(n_neighbors=n_neighbors, n_jobs=8)
elif ("svc" in sys.argv or "svm" in sys.argv):
    size_model = SVC(C=C)
    message_readability_model = SVC(C=C)
    message_length_model = SVC(C=C)
else:
    size_model = linreg()
    message_readability_model = linreg()
    message_length_model = linreg()

size_model.fit(scaled_size_features, size_output)
message_readability_model.fit(scaled_message_features, readability_output)
message_length_model.fit(scaled_message_features, length_output)

predicted_length = message_length_model.predict(scaled_message_features)
predicted_readability = message_readability_model.predict(
    scaled_message_features)
predicted_size = size_model.predict(scaled_size_features)

# Test the models for accuracy
if ("knn" in sys.argv):
    print("Accuracy for message length with knn k=" + str(n_neighbors) +
Exemple #15
0
"""
Same plot but for a smaller kappa interval (kappa=1,1.5)
This plot is used in the thesis!
"""
kInt=np.linspace(1,1.5,2001)
distCom=[]
stdCom=[]
for i in range(0,len(kInt)):
    distCom.append(1/(2*np.pi)*np.e**(kInt[i]*np.cos(x-0)))
    distCom[i]=distCom[i]/sum(distCom[i])
    stdCom.append(np.sqrt(sum(x**2*distCom[i])))
ax2=fig.add_subplot(1,2,2)
ax2.set_ylabel("kappa")
ax2.set_xlabel("standard deviation")
ax2.plot(np.rad2deg(stdCom),kInt,color="black")
#Setting a smaller kappa interval causes the relationship to be approximately linear. By this way, linear regression between Kappa and std can
#be done to find out the Kappa value of an std value which we would like to have. 
"""
Fitting a linear regression line to std von Mises and Kappa in interval [0.5;1.5]
The function is transferred to supplementary_functions.py
"""

model=linreg().fit(np.asarray(np.rad2deg(stdCom)).reshape(-1,1),kInt)#creating the linear regression model, x value has to be transposed in advance!
model.score(np.asarray(np.rad2deg(stdCom)).reshape(-1,1),kInt)#Returns the coefficient of determination R^2 of the prediction.
#0.9978434393176431 is just perfect!
model.intercept_#3.5869855951879352 is the intercept (kappa for stdvM=0), dont take the value serious
model.coef_#-0.03539763 is coefficient, by which x value decays.
# IMPORTANT: this regression is useful if and only if kappa is between 0.5 and 1.5, as the fit is done in that interval!


Exemple #16
0
]]

y = a['SalePrice']

X1 = b[[
    'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond',
    'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
    'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',
    'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath',
    'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces',
    'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF',
    'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal',
    'MoSold', 'YrSold'
]]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=23)

#scalar.fit(X_train)
#X_train_scaled=scalar.transform(X_train)
#X_test_scaled=scalar.transform(X_test)

lr = linreg(alpha=20.0).fit(X_train, y_train)

#print('Coefficient: ',lr.coef_)
#print('Intercept: ',lr.intercept_)

print('R-squared score(training):{:.3f}'.format(lr.score(X_train, y_train)))
print('R-squared score(test):{:.3f}'.format(lr.score(X_test, y_test)))

print(lr.predict(X1))
def polynomial_reg(degree=2, **kwargs):
    """Pipeline regression models"""
    return make_pipeline(polynom(degree), linreg(**kwargs))
Exemple #18
0
print(pd.value_counts(titanic["Embarked"].values, sort=False))
# "S" is most common char -> chosen as default for missing values
titanic["Embarked"] = titanic["Embarked"].fillna("S")

#4) Replace Embarked char with numeric code
#titanic.loc[titanic["Embarked"]=="S", "Embarked"]=0 # 'S' -> 0
#titanic.loc[titanic["Embarked"]=="C", "Embarked"]=1 # 'C' -> 1
titanic.loc[titanic["Embarked"] == "S", "Embarked"] = 0
titanic.loc[titanic["Embarked"] == "C", "Embarked"] = 1
titanic.loc[titanic["Embarked"] == "Q", "Embarked"] = 2  # 'Q' -> 2

# input column used for predictions :
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]

# Initialize the algorithm
algo_linreg = linreg()

# Generate cross-validation folds with random splits
# return rows indices for corresponding train and set
kf = KFold(titanic.shape[0], n_folds=3, random_state=1)

# Make the predictions
predictions = []
for train, test in kf:
    # Which predictors used on train fold
    train_predictors = (titanic[predictors].iloc[train, :])
    # Target/goal used to train the algo
    train_target = titanic["Survived"].iloc[train]

    # Train the algo with the predictors and target
    # .fit(x input, y output)