Esempio n. 1
0
    def checkMovingOLS(self, window_type, x, y, weights=None, **kwds):
        window = np.linalg.matrix_rank(x.values) * 2

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            moving = ols(y=y, x=x, weights=weights, window_type=window_type, window=window, **kwds)

        # check that sparse version is the same
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            sparse_moving = ols(
                y=y.to_sparse(), x=x.to_sparse(), weights=weights, window_type=window_type, window=window, **kwds
            )
        _compare_ols_results(moving, sparse_moving)

        index = moving._index

        for n, i in enumerate(moving._valid_indices):
            if window_type == "rolling" and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in compat.iteritems(x):
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
                static = ols(y=y_iter, x=x_iter, weights=weights, **kwds)

            self.compare(static, moving, event_index=i, result_index=n)

        _check_non_raw_results(moving)
Esempio n. 2
0
    def test_wls_panel(self):
        y = tm.makeTimeDataFrame()
        x = Panel({"x1": tm.makeTimeDataFrame(), "x2": tm.makeTimeDataFrame()})

        y.ix[[1, 7], "A"] = np.nan
        y.ix[[6, 15], "B"] = np.nan
        y.ix[[3, 20], "C"] = np.nan
        y.ix[[5, 11], "D"] = np.nan

        stack_y = y.stack()
        stack_x = DataFrame(dict((k, v.stack()) for k, v in x.iteritems()))

        weights = x.std("items")
        stack_weights = weights.stack()

        stack_y.index = stack_y.index.get_tuple_index()
        stack_x.index = stack_x.index.get_tuple_index()
        stack_weights.index = stack_weights.index.get_tuple_index()

        result = ols(y=y, x=x, weights=1 / weights)
        expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights)

        assert_almost_equal(result.beta, expected.beta)

        for attr in ["resid", "y_fitted"]:
            rvals = getattr(result, attr).stack().values
            evals = getattr(expected, attr).values
            assert_almost_equal(rvals, evals)
Esempio n. 3
0
    def test_wls_panel(self):
        y = tm.makeTimeDataFrame()
        x = Panel({"x1": tm.makeTimeDataFrame(), "x2": tm.makeTimeDataFrame()})

        y.ix[[1, 7], "A"] = np.nan
        y.ix[[6, 15], "B"] = np.nan
        y.ix[[3, 20], "C"] = np.nan
        y.ix[[5, 11], "D"] = np.nan

        stack_y = y.stack()
        stack_x = DataFrame(dict((k, v.stack()) for k, v in compat.iteritems(x)))

        weights = x.std("items")
        stack_weights = weights.stack()

        stack_y.index = stack_y.index._tuple_index
        stack_x.index = stack_x.index._tuple_index
        stack_weights.index = stack_weights.index._tuple_index

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=y, x=x, weights=1 / weights)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights)

        assert_almost_equal(result.beta, expected.beta)

        for attr in ["resid", "y_fitted"]:
            rvals = getattr(result, attr).stack().values
            evals = getattr(expected, attr).values
            assert_almost_equal(rvals, evals)
Esempio n. 4
0
    def checkMovingOLS(self, x, y, window_type='rolling', **kwds):
        window = 25  # must be larger than rank of x

        moving = ols(y=y, x=x, window_type=window_type,
                     window=window, **kwds)

        index = moving._index

        for n, i in enumerate(moving._valid_indices):
            if window_type == 'rolling' and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in x.iteritems():
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            static = ols(y=y_iter, x=x_iter, **kwds)

            self.compare(static, moving, event_index=i,
                         result_index=n)

        _check_non_raw_results(moving)
Esempio n. 5
0
    def checkMovingOLS(self, x, y, window_type="rolling", **kwds):
        window = 25  # must be larger than rank of x

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            moving = ols(y=y, x=x, window_type=window_type, window=window, **kwds)

        index = moving._index

        for n, i in enumerate(moving._valid_indices):
            if window_type == "rolling" and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in compat.iteritems(x):
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
                static = ols(y=y_iter, x=x_iter, **kwds)

            self.compare(static, moving, event_index=i, result_index=n)

        _check_non_raw_results(moving)
Esempio n. 6
0
    def checkMovingOLS(self, window_type, x, y, weights=None, **kwds):
        from scikits.statsmodels.tools.tools import rank
        window = rank(x.values) * 2

        moving = ols(y=y, x=x, weights=weights, window_type=window_type,
                     window=window, **kwds)

        # check that sparse version is the same
        sparse_moving = ols(y=y.to_sparse(), x=x.to_sparse(),
                            weights=weights,
                            window_type=window_type,
                            window=window, **kwds)
        _compare_ols_results(moving, sparse_moving)

        index = moving._index

        for n, i in enumerate(moving._valid_indices):
            if window_type == 'rolling' and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in x.iteritems():
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            static = ols(y=y_iter, x=x_iter, weights=weights, **kwds)

            self.compare(static, moving, event_index=i,
                         result_index=n)

        _check_non_raw_results(moving)
Esempio n. 7
0
    def test_wls_panel(self):
        y = tm.makeTimeDataFrame()
        x = Panel({'x1' : tm.makeTimeDataFrame(),
                   'x2' : tm.makeTimeDataFrame()})

        y.ix[[1, 7], 'A'] = np.nan
        y.ix[[6, 15], 'B'] = np.nan
        y.ix[[3, 20], 'C'] = np.nan
        y.ix[[5, 11], 'D'] = np.nan

        stack_y = y.stack()
        stack_x = DataFrame(dict((k, v.stack())
                                  for k, v in x.iteritems()))

        weights = x.std('items')
        stack_weights = weights.stack()

        stack_y.index = stack_y.index.get_tuple_index()
        stack_x.index = stack_x.index.get_tuple_index()
        stack_weights.index = stack_weights.index.get_tuple_index()

        result = ols(y=y, x=x, weights=1/weights)
        expected = ols(y=stack_y, x=stack_x, weights=1/stack_weights)

        assert_almost_equal(result.beta, expected.beta)

        for attr in ['resid', 'y_fitted']:
            rvals = getattr(result, attr).stack().values
            evals = getattr(expected, attr).values
            assert_almost_equal(rvals, evals)
Esempio n. 8
0
    def checkMovingOLS(self, window_type, x, y, **kwds):
        try:
            from scikits.statsmodels.tools.tools import rank
        except ImportError:
            from scikits.statsmodels.tools import rank

        window = rank(x.values) * 2

        moving = ols(y=y, x=x, window_type=window_type,
                     window=window, **kwds)

        if isinstance(moving.y, Series):
            index = moving.y.index
        elif isinstance(moving.y, LongPanel):
            index = moving.y.major_axis

        for n, i in enumerate(moving._valid_indices):
            if window_type == 'rolling' and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in x.iteritems():
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            static = ols(y=y_iter, x=x_iter, **kwds)

            self.compare(static, moving, event_index=i,
                         result_index=n)

        _check_non_raw_results(moving)
Esempio n. 9
0
    def test_wls_panel(self):
        y = tm.makeTimeDataFrame()
        x = Panel({'x1': tm.makeTimeDataFrame(),
                   'x2': tm.makeTimeDataFrame()})

        y.iloc[[1, 7], y.columns.get_loc('A')] = np.nan
        y.iloc[[6, 15], y.columns.get_loc('B')] = np.nan
        y.iloc[[3, 20], y.columns.get_loc('C')] = np.nan
        y.iloc[[5, 11], y.columns.get_loc('D')] = np.nan

        stack_y = y.stack()
        stack_x = DataFrame(dict((k, v.stack())
                                 for k, v in x.iteritems()))

        weights = x.std('items')
        stack_weights = weights.stack()

        stack_y.index = stack_y.index._tuple_index
        stack_x.index = stack_x.index._tuple_index
        stack_weights.index = stack_weights.index._tuple_index

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=y, x=x, weights=1 / weights)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights)

        assert_almost_equal(result.beta, expected.beta)

        for attr in ['resid', 'y_fitted']:
            rvals = getattr(result, attr).stack().values
            evals = getattr(expected, attr).values
            assert_almost_equal(rvals, evals)
Esempio n. 10
0
    def test_plm_attrs(self):
        y = tm.makeTimeDataFrame()
        x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()}

        rmodel = ols(y=y, x=x, window=10)
        model = ols(y=y, x=x)
        model.resid
        rmodel.resid
Esempio n. 11
0
    def test_auto_rolling_window_type(self):
        data = tm.makeTimeDataFrame()
        y = data.pop("A")

        window_model = ols(y=y, x=data, window=20, min_periods=10)
        rolling_model = ols(y=y, x=data, window=20, min_periods=10, window_type="rolling")

        assert_frame_equal(window_model.beta, rolling_model.beta)
Esempio n. 12
0
    def checkForSeries(self, x, y, series_x, series_y, **kwds):
        # Consistency check with simple OLS.
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=y, x=x, **kwds)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            reference = ols(y=series_y, x=series_x, **kwds)

        self.compare(reference, result)
Esempio n. 13
0
    def test_series_rhs(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeSeries()
        model = ols(y=y, x=x)
        expected = ols(y=y, x={"x": x})
        assert_series_equal(model.beta, expected.beta)

        # GH 5233/5250
        assert_series_equal(model.y_predict, model.predict(x=x))
Esempio n. 14
0
    def test_plm_ctor(self):
        y = tm.makeTimeDataFrame()
        x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()}

        model = ols(y=y, x=x, intercept=False)
        model.summary

        model = ols(y=y, x=Panel(x))
        model.summary
Esempio n. 15
0
    def test_plm_attrs(self):
        y = tm.makeTimeDataFrame()
        x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()}

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            rmodel = ols(y=y, x=x, window=10)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x)
        model.resid
        rmodel.resid
Esempio n. 16
0
    def test_auto_rolling_window_type(self):
        data = tm.makeTimeDataFrame()
        y = data.pop("A")

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            window_model = ols(y=y, x=data, window=20, min_periods=10)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            rolling_model = ols(y=y, x=data, window=20, min_periods=10, window_type="rolling")

        assert_frame_equal(window_model.beta, rolling_model.beta)
Esempio n. 17
0
    def test_plm_ctor(self):
        y = tm.makeTimeDataFrame()
        x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()}

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x, intercept=False)
        model.summary

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=Panel(x))
        model.summary
Esempio n. 18
0
    def test_series_rhs(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeSeries()
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            expected = ols(y=y, x={'x': x})
        assert_series_equal(model.beta, expected.beta)

        # GH 5233/5250
        assert_series_equal(model.y_predict, model.predict(x=x))
Esempio n. 19
0
    def testFiltering(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assertTrue
        (exp_index.equals(index))

        index = x.index.get_level_values(1)
        index = Index(sorted(set(index)))
        exp_index = Index(["A", "B"])
        self.assertTrue(exp_index.equals(index))

        x = result._x_filtered
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3), datetime(2000, 1, 4)])
        self.assertTrue(exp_index.equals(index))

        assert_almost_equal(result._y.values.flat, [1, 4, 5])

        exp_x = [[6, 14, 1], [9, 17, 1], [30, 48, 1]]
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]]
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assertTrue(result._x_filtered.index.levels[0].equals(result.y_fitted.index))
Esempio n. 20
0
    def test_plm_lagged_y_predict(self):
        y = tm.makeTimeDataFrame()
        x = {'a' : tm.makeTimeDataFrame(),
             'b' : tm.makeTimeDataFrame()}

        model = ols(y=y, x=x, window=10)
        result = model.lagged_y_predict(2)
Esempio n. 21
0
    def test_plm_lagged_y_predict(self):
        y = tm.makeTimeDataFrame()
        x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()}

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x, window=10)
        result = model.lagged_y_predict(2)
Esempio n. 22
0
 def test_y_predict(self):
     y = tm.makeTimeSeries()
     x = tm.makeTimeDataFrame()
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         model1 = ols(y=y, x=x)
     assert_series_equal(model1.y_predict, model1.y_fitted)
     assert_almost_equal(model1._y_predict_raw, model1._y_fitted_raw)
Esempio n. 23
0
    def ols_results(self):
        """
        Returns the results of the regressions:
        x_1 ~ L(X)
        x_2 ~ L(X)
        ...
        x_k ~ L(X)

        where X = [x_1, x_2, ..., x_k]
        and L(X) represents the columns of X lagged 1, 2, ..., n lags
        (n is the user-provided number of lags).

        Returns
        -------
        dict
        """
        from pandas.stats.api import ols

        d = {}
        for i in xrange(1, 1 + self._p):
            for col, series in self._lagged_data[i].iteritems():
                d[_make_param_name(i, col)] = series

        result = dict([(col, ols(y=y, x=d, intercept=self._intercept))
                       for col, y in self._data.iteritems()])

        return result
Esempio n. 24
0
    def test_predict(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeDataFrame()
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model1 = ols(y=y, x=x)
        assert_series_equal(model1.predict(), model1.y_predict)
        assert_series_equal(model1.predict(x=x), model1.y_predict)
        assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict)

        exog = x.copy()
        exog['intercept'] = 1.
        rs = Series(np.dot(exog.values, model1.beta.values), x.index)
        assert_series_equal(model1.y_predict, rs)

        x2 = x.reindex(columns=x.columns[::-1])
        assert_series_equal(model1.predict(x=x2), model1.y_predict)

        x3 = x2 + 10
        pred3 = model1.predict(x=x3)
        x3['intercept'] = 1.
        x3 = x3.reindex(columns=model1.beta.index)
        expected = Series(np.dot(x3.values, model1.beta.values), x3.index)
        assert_series_equal(expected, pred3)

        beta = Series(0., model1.beta.index)
        pred4 = model1.predict(beta=beta)
        assert_series_equal(Series(0., pred4.index), pred4)
Esempio n. 25
0
    def testWithTimeEffects(self):
        result = ols(y=self.panel_y2, x=self.panel_x2, time_effects=True)

        assert_almost_equal(result._y_trans.values.flat, [0, -0.5, 0.5])

        exp_x = [[0, 0], [-10.5, -15.5], [10.5, 15.5]]
        assert_almost_equal(result._x_trans.values, exp_x)
Esempio n. 26
0
    def checkNonPooled(self, x, y, **kwds):
        # For now, just check that it doesn't crash
        result = ols(y=y, x=x, pool=False, **kwds)

        _check_repr(result)
        for attr in NonPooledPanelOLS.ATTRIBUTES:
            _check_repr(getattr(result, attr))
Esempio n. 27
0
    def test_predict(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeDataFrame()
        model1 = ols(y=y, x=x)
        assert_series_equal(model1.predict(), model1.y_predict)
        assert_series_equal(model1.predict(x=x), model1.y_predict)
        assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict)

        exog = x.copy()
        exog["intercept"] = 1.0
        rs = Series(np.dot(exog.values, model1.beta.values), x.index)
        assert_series_equal(model1.y_predict, rs)

        x2 = x.reindex(columns=x.columns[::-1])
        assert_series_equal(model1.predict(x=x2), model1.y_predict)

        x3 = x2 + 10
        pred3 = model1.predict(x=x3)
        x3["intercept"] = 1.0
        x3 = x3.reindex(columns=model1.beta.index)
        expected = Series(np.dot(x3.values, model1.beta.values), x3.index)
        assert_series_equal(expected, pred3)

        beta = Series(0.0, model1.beta.index)
        pred4 = model1.predict(beta=beta)
        assert_series_equal(Series(0.0, pred4.index), pred4)
 def trend_analysis_df(self,trend_dataframe):
   # Define date variables
   date_today = datetime.date.today()
   date_7d_ago = date_today - datetime.timedelta(7)
   date_8d_ago = date_today - datetime.timedelta(8)
   date_14d_ago = date_today - datetime.timedelta(14)
   # Setting up view for regression
   trend_df_last_7d = trend_dataframe.ix[str(date_7d_ago):str(date_today)]
   trend_df_prior_7d = trend_dataframe.ix[str(date_14d_ago):str(date_8d_ago)]
   # Get timeseries means
   trend_series_last7d_mean = pd.Series(trend_df_last_7d.mean(), name='Daily Avg (Last week)')
   trend_series_prior7d_mean = pd.Series(trend_df_prior_7d.mean(), name='Daily Avg (Prior week)')
   trend_series_last30d_mean = pd.Series(trend_dataframe.mean(), name='Daily Avg (Last 30 days)')
   # Get Regression Coeffs
   trend_series_30d_regress_coeff = pd.Series(name='Regress_coeff (30d)')
   for i in trend_dataframe:
     # Conduct Regression for each event
     t_series = pd.Series(trend_dataframe[i],index=trend_dataframe.index).sort_index()
     s_series = pd.Series(t_series.values)
     s_reset_as_df = s_series.reset_index()
     s_coeff = ols(x=s_reset_as_df["index"] ,y=s_reset_as_df[0]).beta['x'] # Gets the regression coeff
     trend_series_30d_regress_coeff = trend_series_30d_regress_coeff.set_value(i,s_coeff)
   # Create Trend Analysis Dataframe
   trend_analysis_df = pd.concat([trend_series_last7d_mean,trend_series_prior7d_mean,trend_series_last30d_mean,trend_series_30d_regress_coeff],axis=1)
   trend_analysis_df.index.name = "Events"
   return trend_analysis_df
Esempio n. 29
0
def cointegrate(ticker1,df1,ts1,ticker2,df2,ts2):
	
	df = pd.DataFrame(index=df1.index) 
	column1 = '{}_{}'.format(ticker1,ts1)
	column2 = '{}_{}'.format(ticker2,ts2)
	
	df[column1] = df1[ts1].astype('float') 
	df[column2] = df2[ts2].astype('float')
	
	# Plot the two time series 
	#plot_price_series(df1, ts1, df2,ts2)

	# Display a scatter plot of the two time series 
	#plot_scatter_series(df1, ts1, df2,ts2)
	# Calculate optimal hedge ratio "beta" 
	res = ols(y=df[column2], x=df[column1]) 
	print(res)
	#print(res.params)
	#res = res.fit()
	#print(res.summary())
	beta_hr = res.beta.x
	print(res.beta.intercept)
	# Calculate the residuals of the linear combination 
	#df = pd.DataFrame(index = df1.index)
	df['model']= res.beta.intercept+beta_hr*df[column1]
	df["res"] = df[column2] - df['model']

	# Plot the residuals 
	plot_residuals(df)
	
	# Calculate and output the CADF test on the residuals 
	test = Test_Stationarity(df,'res')
	test.dickey_fuller_test()
	test.test_hurst_exponent()
Esempio n. 30
0
    def testWithWeights(self):
        data = np.arange(10).reshape((5, 2))
        index = [datetime(2000, 1, 1),
                 datetime(2000, 1, 2),
                 datetime(2000, 1, 3),
                 datetime(2000, 1, 4),
                 datetime(2000, 1, 5)]
        cols = ['A', 'B']
        weights = DataFrame(data, index=index, columns=cols)

        result = ols(y=self.panel_y2, x=self.panel_x2, weights=weights)

        assert_almost_equal(result._y_trans.values.flat, [0, 16, 25])

        exp_x = [[0, 0, 0],
                 [36, 68, 4],
                 [150, 240, 5]]
        assert_almost_equal(result._x_trans.values, exp_x)


        exp_x_filtered = [[6, 14, 1],
                          [9, 17, 1],
                          [30, 48, 1],
                          [11, 20, 1],
                          [12, 21, 1]]
#         exp_x_filtered = [[0, 0, 0],
#                           [36, 68, 4],
#                           [150, 240, 5],
#                           [66, 120, 6],
#                           [84, 147, 7]]

        assert_almost_equal(result._x_filtered.values, exp_x_filtered)
Esempio n. 31
0
    def checkOLS(self, exog, endog, x, y):
        reference = sm.OLS(endog, sm.add_constant(exog, prepend=False)).fit()
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=y, x=x)

        # check that sparse version is the same
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            sparse_result = ols(y=y.to_sparse(), x=x.to_sparse())
        _compare_ols_results(result, sparse_result)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.tvalues, result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)
Esempio n. 32
0
    def test_f_test(self):
        x = tm.makeTimeDataFrame()
        y = x.pop('A')

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x)

        hyp = '1*B+1*C+1*D=0'
        result = model.f_test(hyp)

        hyp = ['1*B=0', '1*C=0', '1*D=0']
        result = model.f_test(hyp)
        assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])

        self.assertRaises(Exception, model.f_test, '1*A=0')
Esempio n. 33
0
    def test_r2_no_intercept(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeDataFrame()

        x_with = x.copy()
        x_with['intercept'] = 1.

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model1 = ols(y=y, x=x)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model2 = ols(y=y, x=x_with, intercept=False)
        assert_series_equal(model1.beta, model2.beta)

        # TODO: can we infer whether the intercept is there...
        self.assertNotEqual(model1.r2, model2.r2)

        # rolling

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model1 = ols(y=y, x=x, window=20)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model2 = ols(y=y, x=x_with, window=20, intercept=False)
        assert_frame_equal(model1.beta, model2.beta)
        self.assertTrue((model1.r2 != model2.r2).all())
Esempio n. 34
0
    def test_plm_exclude_dummy_corner(self):
        y = tm.makeTimeDataFrame()
        x = {'a': tm.makeTimeDataFrame(),
             'b': tm.makeTimeDataFrame()}

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(
                y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'D'})
        model.summary

        def f():
            with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
                ols(y=y, x=x, entity_effects=True,
                    dropped_dummies={'entity': 'E'})
        self.assertRaises(Exception, f)
Esempio n. 35
0
    def test_plm_f_test(self):
        y = tm.makeTimeDataFrame()
        x = {'a': tm.makeTimeDataFrame(),
             'b': tm.makeTimeDataFrame()}

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x)

        hyp = '1*a+1*b=0'
        result = model.f_test(hyp)

        hyp = ['1*a=0',
               '1*b=0']
        result = model.f_test(hyp)
        assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])
Esempio n. 36
0
    def testWithXEffectsAndDroppedDummies(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2,
                         x=self.panel_x2,
                         x_effects=['x1'],
                         dropped_dummies={'x1': 30})

        res = result._x
        assert_almost_equal(result._y.values.flat, [1, 4, 5])
        exp_x = DataFrame([[1., 0., 14., 1.], [0, 1, 17, 1], [0, 0, 48, 1]],
                          columns=['x1_6', 'x1_9', 'x2', 'intercept'],
                          index=res.index,
                          dtype=float)

        assert_frame_equal(res, exp_x.reindex(columns=res.columns))
Esempio n. 37
0
    def testWithEntityEffectsAndDroppedDummies(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2,
                         x=self.panel_x2,
                         entity_effects=True,
                         dropped_dummies={'entity': 'B'})

        # .flat is flatiter instance
        assert_almost_equal(result._y.values.flat, [1, 4, 5],
                            check_dtype=False)
        exp_x = DataFrame([[1., 6., 14., 1.], [1, 9, 17, 1], [0, 30, 48, 1]],
                          index=result._x.index,
                          columns=['FE_A', 'x1', 'x2', 'intercept'],
                          dtype=float)
        tm.assert_frame_equal(result._x, exp_x.loc[:, result._x.columns])
Esempio n. 38
0
    def testWithXEffects(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'])

        # .flat is flatiter instance
        assert_almost_equal(result._y.values.flat, [1, 4, 5],
                            check_dtype=False)

        res = result._x
        exp_x = DataFrame([[0., 0., 14., 1.], [0, 1, 17, 1], [1, 0, 48, 1]],
                          columns=['x1_30', 'x1_9', 'x2', 'intercept'],
                          index=res.index,
                          dtype=float)
        exp_x[['x1_30', 'x1_9']] = exp_x[['x1_30', 'x1_9']].astype(np.uint8)
        assert_frame_equal(res, exp_x.reindex(columns=res.columns))
Esempio n. 39
0
    def test_various_attributes(self):
        # just make sure everything "works". test correctness elsewhere

        x = DataFrame(np.random.randn(100, 5))
        y = np.random.randn(100)
        model = ols(y=y, x=x, window=20)

        series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol']

        for attr in series_attrs:
            value = getattr(model, attr)
            self.assert_(isinstance(value, Series))

        # works
        model._results
Esempio n. 40
0
    def test_plm_exclude_dummy_corner(self):
        y = tm.makeTimeDataFrame()
        x = {'a': tm.makeTimeDataFrame(), 'b': tm.makeTimeDataFrame()}

        model = ols(y=y,
                    x=x,
                    entity_effects=True,
                    dropped_dummies={'entity': 'D'})
        model.summary

        self.assertRaises(Exception,
                          ols,
                          y=y,
                          x=x,
                          entity_effects=True,
                          dropped_dummies={'entity': 'E'})
Esempio n. 41
0
    def test_f_test(self):
        x = tm.makeTimeDataFrame()
        y = x.pop('A')

        model = ols(y=y, x=x)

        hyp = '1*B+1*C+1*D=0'
        result = model.f_test(hyp)

        hyp = ['1*B=0',
               '1*C=0',
               '1*D=0']
        result = model.f_test(hyp)
        assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])

        self.assertRaises(Exception, model.f_test, '1*A=0')
Esempio n. 42
0
    def test_various_attributes(self):
        # just make sure everything "works". test correctness elsewhere

        x = DataFrame(np.random.randn(100, 5))
        y = np.random.randn(100)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x, window=20)

        series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol']

        for attr in series_attrs:
            value = getattr(model, attr)
            tm.assertIsInstance(value, Series)

        # works
        model._results
Esempio n. 43
0
    def testWithXEffectsAndConversionAndDroppedDummies(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y3,
                         x=self.panel_x3,
                         x_effects=['x1', 'x2'],
                         dropped_dummies={'x2': 'foo'})
        # .flat is flatiter instance
        assert_almost_equal(result._y.values.flat, [1, 2, 3, 4],
                            check_dtype=False)
        exp_x = np.array([[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1],
                          [0, 0, 0, 0, 1]],
                         dtype=np.float64)
        assert_almost_equal(result._x.values, exp_x)

        exp_index = Index(['x1_B', 'x1_C', 'x2_bar', 'x2_baz', 'intercept'])
        self.assert_index_equal(exp_index, result._x.columns)
Esempio n. 44
0
    def runTest(self, stock1, stock2):

        start = self.start
        end = self.end

        first = web.DataReader(stock1, "yahoo", start, end)
        second = web.DataReader(stock2, "yahoo", start, end)

        first["Value"] = map(self.formula, first["Adj Close"].tolist())
        second["Value"] = map(self.formula, second["Adj Close"].tolist())

        df = pd.DataFrame(index=first.index)
        df[stock1] = first["Value"]
        df[stock2] = second["Value"]

        res = ols(y=df[stock2], x=df[stock1])

        beta = res.beta.x
        R2 = res.r2

        df["res"] = df[stock2] - beta * df[stock1]

        #Runs CADF and get results
        cadf = ts.adfuller(df["res"])

        testStat = cadf[0]
        pValue = cadf[1]

        #Calculates Hurst Exponent
        hurst = self.hurst(df["res"])

        results = df["res"].tolist()
        counter = 1
        delta = []
        while counter < len(results):
            temp = results[counter] - results[counter - 1]
            delta.append(temp)
            counter = counter + 1
        results.pop()

        halfLife = self.half_life(delta, results)

        pair = Pair(stock1, stock2, beta, R2, testStat, pValue, hurst,
                    halfLife)

        return pair
Esempio n. 45
0
    def checkOLS(self, exog, endog, x, y):
        reference = sm.OLS(endog, sm.add_constant(exog)).fit()
        result = ols(y=y, x=x)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.t(), result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)
Esempio n. 46
0
    def _check_wls(self, x, y, weights):
        result = ols(y=y, x=x, weights=1/weights)

        combined = x.copy()
        combined['__y__'] = y
        combined['__weights__'] = weights
        combined = combined.dropna()

        endog = combined.pop('__y__').values
        aweights = combined.pop('__weights__').values
        exog = sm.add_constant(combined.values, prepend=False)

        sm_result = sm.WLS(endog, exog, weights=1/aweights).fit()

        assert_almost_equal(sm_result.params, result._beta_raw)
        assert_almost_equal(sm_result.resid, result._resid_raw)

        self.checkMovingOLS('rolling', x, y, weights=weights)
        self.checkMovingOLS('expanding', x, y, weights=weights)
Esempio n. 47
0
    def test_predict_longer_exog(self):
        exogenous = {
            "1998": "4760",
            "1999": "5904",
            "2000": "4504",
            "2001": "9808",
            "2002": "4241",
            "2003": "4086",
            "2004": "4687",
            "2005": "7686",
            "2006": "3740",
            "2007": "3075",
            "2008": "3753",
            "2009": "4679",
            "2010": "5468",
            "2011": "7154",
            "2012": "4292",
            "2013": "4283",
            "2014": "4595",
            "2015": "9194",
            "2016": "4221",
            "2017": "4520"
        }
        endogenous = {
            "1998": "691",
            "1999": "1580",
            "2000": "80",
            "2001": "1450",
            "2002": "555",
            "2003": "956",
            "2004": "877",
            "2005": "614",
            "2006": "468",
            "2007": "191"
        }

        endog = Series(endogenous)
        exog = Series(exogenous)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=endog, x=exog)

        pred = model.y_predict
        self.assertTrue(pred.index.equals(exog.index))
Esempio n. 48
0
    def testFiltering(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assertTrue
        (exp_index.equals(index))

        index = x.index.get_level_values(1)
        index = Index(sorted(set(index)))
        exp_index = Index(['A', 'B'])
        self.assertTrue(exp_index.equals(index))

        x = result._x_filtered
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1),
                           datetime(2000, 1, 3),
                           datetime(2000, 1, 4)])
        self.assertTrue(exp_index.equals(index))

        assert_almost_equal(result._y.values.flat, [1, 4, 5])

        exp_x = [[6, 14, 1],
                 [9, 17, 1],
                 [30, 48, 1]]
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = [[6, 14, 1],
                          [9, 17, 1],
                          [30, 48, 1],
                          [11, 20, 1],
                          [12, 21, 1]]
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assertTrue(result._x_filtered.index.levels[0].equals(
            result.y_fitted.index))
Esempio n. 49
0
    def testFiltering(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assert_index_equal(exp_index, index)

        index = x.index.get_level_values(1)
        index = Index(sorted(set(index)))
        exp_index = Index(['A', 'B'])
        self.assert_index_equal(exp_index, index)

        x = result._x_filtered
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index(
            [datetime(2000, 1, 1),
             datetime(2000, 1, 3),
             datetime(2000, 1, 4)])
        self.assert_index_equal(exp_index, index)

        # .flat is flatiter instance
        assert_almost_equal(result._y.values.flat, [1, 4, 5],
                            check_dtype=False)

        exp_x = np.array([[6, 14, 1], [9, 17, 1], [30, 48, 1]],
                         dtype=np.float64)
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = np.array(
            [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]],
            dtype=np.float64)
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assert_index_equal(result._x_filtered.index.levels[0],
                                result.y_fitted.index)
Esempio n. 50
0
def cadf_test(tickdict1, tickdict2, begdate, enddate):
    import datetime
    import numpy as np
    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates
    import pandas as pd

    import pprint
    import statsmodels.tsa.stattools as sts
    from pandas.stats.api import ols
    import tushare as ts
    print(begdate, enddate)
    ticker1 = tickdict1['code']
    ticker2 = tickdict2['code']
    symbol1 = tickdict1['symbo']
    symbol2 = tickdict2['symbo']
    print(ticker1, ticker2)
    df1 = ts.get_k_data(ticker1, start=begdate, end=enddate)
    df2 = ts.get_k_data(ticker2, start=begdate, end=enddate)
    df1.index = df1['date']
    df2.index = df2['date']
    df = pd.DataFrame(index=df1['date'])
    df[symbol1] = df1["close"]
    df[symbol2] = df2["close"]

    # Plot the two time series
    # plot_scatter_series(df, "sz50", "hs300")

    # Calculate optimal hedge ratio "beta"
    res = ols(y=df[symbol2], x=df[symbol1])
    beta_hr = res.beta.x
    # Calculate the residuals of the linear combination
    df["res"] = df[symbol2] - beta_hr * df[symbol1]
    # Plot the residuals
    # plot_residuals(df)
    # Calculate and output the CADF test on the residuals
    cadf = sts.adfuller(df["res"])
    pprint.pprint(cadf)
    return cadf
Esempio n. 51
0
    def testFiltering(self):
        result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = [x.major_axis[i] for i in x.major_labels]
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assertTrue(exp_index.equals(index))

        index = [x.minor_axis[i] for i in x.minor_labels]
        index = Index(sorted(set(index)))
        exp_index = Index(['A', 'B'])
        self.assertTrue(exp_index.equals(index))

        x = result._x_filtered
        index = [x.major_axis[i] for i in x.major_labels]
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1),
                           datetime(2000, 1, 3),
                           datetime(2000, 1, 4)])
        self.assertTrue(exp_index.equals(index))

        assert_almost_equal(result._y.values.flat, [1, 4, 5])

        exp_x = [[6, 14, 1],
                 [9, 17, 1],
                 [30, 48, 1]]
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = [[6, 14, 1],
                          [9, 17, 1],
                          [30, 48, 1],
                          [11, 20, 1],
                          [12, 21, 1]]
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assertTrue(result._x_filtered.major_axis.equals(
            result.y_fitted.index))
Esempio n. 52
0
def fill_regressed_data(S):
    """ Fill missing returns by linear combinations of assets without missing returns. """
    S = S.copy()
    R = np.log(S).diff()
    R.iloc[0] = 0

    X = R.dropna(1)

    for col in set(S.columns) - set(X.columns):
        R[col].iloc[0] = np.nan
        y = R[col]

        # fit regression
        res = ols(y=y, x=X, intercept=True)
        pred = res.predict(x=X[y.isnull()])

        # get absolute prices
        pred = pred.cumsum()
        pred += np.log(S[col].dropna().iloc[0]) - pred.iloc[-1]

        # fill missing data
        S[col] = S[col].fillna(np.exp(pred))

    return S
Esempio n. 53
0
def calc_positive_negative_dates(data,
                                 pos_x_min=0.005,
                                 pos_x_max=0.01,
                                 neg_y_max=-0.005,
                                 neg_y_min=-0.01):
    pdata = data[(
        (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) > pos_x_min) &
        (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) < pos_x_max)) | (
            (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) < neg_y_max) &
            (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) > neg_y_min))]
    #calculate prev close to open return andn open to close return and regress
    prev_close_open = (pdata.OPEN - pdata.PREV_CLOSE) / pdata.PREV_CLOSE
    open_close = (pdata.CLOSE - pdata.OPEN) / pdata.OPEN
    fig = plt.figure()
    plt.scatter(x=prev_close_open, y=open_close)
    fig.suptitle('Posb(%03f,%03f)andNeg(%03f,%03f)' %
                 (pos_x_min, pos_x_max, neg_y_max, neg_y_min),
                 fontsize=20)
    plt.xlabel('prev_close_open', fontsize=10)
    plt.ylabel('open_close', fontsize=10)
    plt.savefig('Posb(%03f,%03f)andNeg(%03f,%03f).jpg' %
                (pos_x_min, pos_x_max, neg_y_max, neg_y_min))
    res = ols(y=open_close, x=prev_close_open)
    print res
Esempio n. 54
0
def regression_without_ccy_nation(Currency,typ):
    
    #typ='Corp'
    #nation of interest 
    reg_df=pd.read_excel(ROOT_DIR  + 'cleaned data/regression data/' + typ +'/' + Currency + '_' + typ +'.xlsx',)
    key_ccy=list(NATION_CURRENCY_DICT.keys())[list(NATION_CURRENCY_DICT.values()).index(Currency)];
    reg_df=reg_df[reg_df.Currency==key_ccy]
    
    n=len(reg_df.index);
    mu1=np.zeros(n)
    sigma1=np.zeros(n)
    mu2=np.zeros(n)
    sigma2=np.zeros(n)
    for i in range(n):
        date_obs=reg_df['Date'][i]
        mu1[i]=np.mean(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-12)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)'])
        mu2[i]=np.mean(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-24)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)'])
        sigma1[i]=np.std(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-12)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)'])
        sigma2[i]=np.std(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-24)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)'])
    
    
    reg_df['normal_amount_1y'] = (reg_df['PrincipalAmount($mil)'] - mu1)/sigma1
    reg_df['normal_amount_2y'] = (reg_df['PrincipalAmount($mil)'] - mu2)/sigma2
    cols = reg_df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    reg_df = reg_df[cols]
    reg_df=reg_df[reg_df['Currency']!=reg_df['Nation']]
    
    res1 = ols(y = reg_df['PrincipalAmount($mil)'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']])
    res2 = ols(y = reg_df['normal_amount_1y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']])
    res3 = ols(y = reg_df['normal_amount_2y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']])
    res4 = ols(y = reg_df['PrincipalAmount($mil)'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile']])
    res5 = ols(y = reg_df['normal_amount_1y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile']])
    res6 = ols(y = reg_df['normal_amount_2y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile']])
  
    correl_matrix=reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']].corr()
    
    return res1, res2, res3, res4, res5, res6, reg_df, correl_matrix
Esempio n. 55
0
def filter_obs(hob_df, filter_water_table, filter_stresses):
    '''Applies depth and measurement variability criteria to the head observations.'''

    exclusion_dict = {}
    for iname, idf in hob_df.groupby('site_no'):

        idow = idf['well_depth_va'].mean()
        idtw = idf['lev_va'].mean()
        istd = idf['lev_va'].std()

        if (
                filter_water_table == True
        ):  # Reduce the dataframe to only those sites that are likely to measure the water table

            # Keep all wells shallower than minimum depth
            if (idtw <= min_dtw):
                continue

            # Criteria: Exclude depths to water that are deeper than the likely water table elevation
            if (idtw > max_dtw):
                # print 'Too deep: ',iname,idtw
                exclusion_dict[iname] = 'dtw = %i > max dtw' % (idtw)
                continue

            # Criteria: Exclude wells that are likely measuring a confined aquifer
            if ((idow - idtw) > (sat_thick_mult * idow)
                    and (idow > land_surface_buffer)):
                # print 'Likely in a confined aquifer: ',iname,idtw
                exclusion_dict[iname] = 'sat thick %i > %0.2f * dow' % (
                    (idow - idtw), sat_thick_mult)
                continue

        if (filter_stresses == True):

            # Keep all wells shallower than minimum depth
            if (idtw <= min_dtw):
                continue

            # Criteria: Exclude wells with a potential trend over time.
            # Perform ordinary least squares and test both the slope
            # and R^2 of the best fit
            idf['date_delta'] = (idf['lev_dt'] -
                                 idf['lev_dt'].min()) / np.timedelta64(1, 'D')
            imodel = ols(y=idf['lev_va'], x=idf['date_delta'])
            itrend = imodel.beta[
                'x'] * 365.  # The slope of the ols fit converted to length/year
            ir2 = imodel.r2  # The R^2 value for the ols fit

            if ((itrend > max_trend) and (ir2 > max_r2)):
                # print 'Apparent temporal trend: ',iname,itrend
                exclusion_dict[
                    iname] = 'Apparent temporal trend = %0.2f m/year > %0.2f & R^2 = %0.2f > %0.2f' % (
                        itrend, max_trend, ir2, max_r2)
                continue

            if (istd > (std_mult * idtw)):
                # print 'Excessive measurement variability: ',iname,istd
                exclusion_dict[
                    iname] = 'Measurement std = %4.2f > %0.2f * dtw' % (
                        istd, std_mult)
                continue

    return exclusion_dict
Esempio n. 56
0
    def checkForSeries(self, x, y, series_x, series_y, **kwds):
        # Consistency check with simple OLS.
        result = ols(y=y, x=x, **kwds)
        reference = ols(y=series_y, x=series_x, **kwds)

        self.compare(reference, result)
Esempio n. 57
0
if __name__ == "__main__":
    start = datetime.datetime(2012, 1, 1)
    end = datetime.datetime(2013, 1, 1)

    arex = web.DataReader("AREX", "yahoo", start, end)
    wll = web.DataReader("WLL", "yahoo", start, end)

    df = pd.DataFrame(index=arex.index)
    df["AREX"] = arex["Adj Close"]
    df["WLL"] = wll["Adj Close"]

    # Plot the two time series
    plot_price_series(df, "AREX", "WLL")

    # Display a scatter plot of the two time series
    plot_scatter_series(df, "AREX", "WLL")

    # Calculate optimal hedge ratio "beta"
    res = ols(y=df['WLL'], x=df["AREX"])
    beta_hr = res.beta.x

    # Calculate the residuals of the linear combination
    df["res"] = df["WLL"] - beta_hr * df["AREX"]

    # Plot the residuals
    plot_residuals(df)

    # Calculate and output the CADF test on the residuals
    cadf = ts.adfuller(df["res"])
    pprint.pprint(cadf)
Esempio n. 58
0
 def test_summary_many_terms(self):
     x = DataFrame(np.random.randn(100, 20))
     y = np.random.randn(100)
     model = ols(y=y, x=x)
     model.summary
Esempio n. 59
0
 def test_y_predict(self):
     y = tm.makeTimeSeries()
     x = tm.makeTimeDataFrame()
     model1 = ols(y=y, x=x)
     assert_series_equal(model1.y_predict, model1.y_fitted)
     assert_almost_equal(model1._y_predict_raw, model1._y_fitted_raw)
Esempio n. 60
0
 def test_series_rhs(self):
     y = tm.makeTimeSeries()
     x = tm.makeTimeSeries()
     model = ols(y=y, x=x)
     expected = ols(y=y, x={'x' : x})
     assert_series_equal(model.beta, expected.beta)