def test_bilinear_regression_estimator(): from numpy.testing import assert_array_almost_equal, assert_almost_equal a = np.arange(1, 5) b = np.arange(6, 10) abmat = a[:, np.newaxis] * b X = np.random.randn(100, abmat.size) y = X.dot(abmat.ravel()) from sklearn.linear_model import LinearRegression lr = LinearRegression(fit_intercept=False) br = BilinearRegression(fit_intercept=False) lr.fit(X, y) br.fit(X.reshape(X.shape[0], 1, 1, -1), y) br_coef = br.coef_b_.ravel() * br.coef_a_.ravel()[0] assert_array_almost_equal(lr.coef_, br_coef) br.fit(X.reshape(X.shape[0], 1, -1, 1), y) br_coef = br.coef_a_.ravel() * br.coef_b_.ravel()[0] assert_array_almost_equal(lr.coef_, br_coef) offset = 2 lr.fit_intercept = True br.fit_intercept = True lr.fit(X, y + offset) X.shape = X.shape[0], 1, len(a), len(b) br.fit(X, y + offset) assert_almost_equal(br.intercept_, lr.intercept_)
def _get_estimator(self): if self.estimator is None: estimator = LinearRegression() else: estimator = clone(self.estimator) estimator.fit_intercept = False return estimator
def correlationRegression(): scatddx = "POA_Irradiance" scatddy = "Specific_ACPower" reg = Reg() reg.fit_intercept = False reg.fit(np.vstack(df[scatddx]), df[scatddy]) pred = reg.predict(np.vstack(df[scatddx])) data = go.Scatter(x=df[scatddx], y=df[scatddy], mode='markers', opacity=0.8, marker={'size': 4}, showlegend=False) layout = go.Layout( xaxis=dict(title=scatddx, titlefont=dict(size=10), tickfont=dict(size=10), color='#e6e6e6', gridwidth=0.5, gridcolor="#333", zerolinecolor="#333"), yaxis=dict(title=scatddy, titlefont=dict(size=10), tickfont=dict(size=10), color='#e6e6e6', gridwidth=0.5, gridcolor="#333", zerolinecolor="#333"), plot_bgcolor='#282828', paper_bgcolor='#222222', margin=dict(l=5, r=5, t=50, b=50), ) figure = go.Figure(data=data, layout=layout) figure.add_trace( go.Scatter(x=df[scatddx], y=pred, mode='lines', opacity=0.8, line={'width': 2}, showlegend=False)) return dict(msgCorrReg=T('Correlation and Regression'), figCorrReg=figure.to_json())
def linear_model(x, attr, xvars, fit_intercept=None, name=None, cut=None, residuals=True, quiet=True, model='LinearRegression'): """Make a linear model for attr based on xvars as free parameters. Currently only model='LinearRegression' implmented. Uses scikit-learn. residuals: Name of attribute for residuals (default: attr+"_residuals") """ if model is not 'LinearRegression': raise Exception("Currently only model='LinearRegression' implmented.") from sklearn.linear_model import LinearRegression import pandas as pd import numpy as np import xarray as xr lm = LinearRegression() if not name: name = '{:}_model'.format(attr) if not quiet: print '\nUsing scikit-learn LinearRegression to build model for {:} from variables:\n {:}'.format(attr, str(xvars)) allattrs = xvars + [attr] if cut: allattrs += [cut] xx = x.reset_coords()[allattrs].where(np.isfinite(x.reset_coords()[attr]), drop=True) df_xvars0 = xx[xvars].to_dataframe() if cut: df_xvars = xx[xvars].where(xx[cut] == 1, drop=True).to_dataframe() xdata = xx[attr].where(xx[cut] == 1, drop=True).data if not quiet: print '\nUsing following cut in buildiing model' print xx[cut] else: df_xvars = df_xvars0 xdata = xx[attr].data if fit_intercept is not None: lm.fit_intercept = fit_intercept lm.fit(df_xvars, xdata) #ft = pd.DataFrame(zip(df_xvars.columns,lm.coef_), columns=['params','estimatedCoefficients']) x[name] = xr.DataArray(lm.predict(df_xvars0), coords=[('time', df_xvars0.index)]) #x[name] = (['time'], lm.predict(df_xvars0)) x[name].attrs.update(**lm.get_params()) x[name].attrs['unit'] = x[attr].attrs.get('unit','') x[name].attrs['doc'] = 'LinearRegression scikit-learn model for {:} training data'.format(attr) x[name].attrs['model'] = model x[name].attrs['variables'] = xvars x[name].attrs['coef_'] = lm.coef_ x[name].attrs['intercept_'] = lm.intercept_ x[name].attrs['score'] = lm.score(df_xvars, xdata) if not quiet: print '\n****Model Results****' print x.reset_coords()[name] if residuals: if not isinstance(residuals, str): residuals = '{:}_residuals'.format(attr) x[residuals] = (['time'], x[attr]-x[name]) x[residuals].attrs['doc'] = 'Residuals for {:} based on LinearRegression model {:}'.format(attr, name) if not quiet: print '\n****Model Residuals****' print x.reset_coords()[residuals] return x
def linear_model(x, attr, xvars, fit_intercept=None, name=None, cut=None, residuals=True, quiet=True, model='LinearRegression'): """Make a linear model for attr based on xvars as free parameters. Currently only model='LinearRegression' implmented. Uses scikit-learn. residuals: Name of attribute for residuals (default: attr+"_residuals") """ if model is not 'LinearRegression': raise Exception("Currently only model='LinearRegression' implmented.") from sklearn.linear_model import LinearRegression import pandas as pd import numpy as np import xarray as xr lm = LinearRegression() if not name: name = '{:}_model'.format(attr) if not quiet: print '\nUsing scikit-learn LinearRegression to build model for {:} from variables:\n {:}'.format( attr, str(xvars)) allattrs = xvars + [attr] if cut: allattrs += [cut] xx = x.reset_coords()[allattrs].where(np.isfinite(x.reset_coords()[attr]), drop=True) df_xvars0 = xx[xvars].to_dataframe() if cut: df_xvars = xx[xvars].where(xx[cut] == 1, drop=True).to_dataframe() xdata = xx[attr].where(xx[cut] == 1, drop=True).data if not quiet: print '\nUsing following cut in buildiing model' print xx[cut] else: df_xvars = df_xvars0 xdata = xx[attr].data if fit_intercept is not None: lm.fit_intercept = fit_intercept lm.fit(df_xvars, xdata) #ft = pd.DataFrame(zip(df_xvars.columns,lm.coef_), columns=['params','estimatedCoefficients']) x[name] = xr.DataArray(lm.predict(df_xvars0), coords=[('time', df_xvars0.index)]) #x[name] = (['time'], lm.predict(df_xvars0)) x[name].attrs.update(**lm.get_params()) x[name].attrs['unit'] = x[attr].attrs.get('unit', '') x[name].attrs[ 'doc'] = 'LinearRegression scikit-learn model for {:} training data'.format( attr) x[name].attrs['model'] = model x[name].attrs['variables'] = xvars x[name].attrs['coef_'] = lm.coef_ x[name].attrs['intercept_'] = lm.intercept_ x[name].attrs['score'] = lm.score(df_xvars, xdata) if not quiet: print '\n****Model Results****' print x.reset_coords()[name] if residuals: if not isinstance(residuals, str): residuals = '{:}_residuals'.format(attr) x[residuals] = (['time'], x[attr] - x[name]) x[residuals].attrs[ 'doc'] = 'Residuals for {:} based on LinearRegression model {:}'.format( attr, name) if not quiet: print '\n****Model Residuals****' print x.reset_coords()[residuals] return x