def test_johansen(self): s1 = self.load_resource('s1.pickle') s2 = self.load_resource('s2.pickle') s3 = self.load_resource('s3.pickle') a = 0.5 x_1t = numpy.cumsum(s1) + s2 x_2t = a * numpy.cumsum(s1) + s3 x_3t = s3 test_date = datetime(2015, 10, 1) n = len(s1) index = pandas.date_range(test_date - timedelta(10), periods=n, freq='D') y = pandas.DataFrame(index=index, data={'col1': x_1t, 'col2': x_2t, 'col3': x_3t}) jres = cointeg.get_johansen(y, lag=1) self.assertEquals(2, jres['count_cointegration_vectors'], 'number of cointegration vectors does not match') v1 = jres['cointegration_vectors'][:, 0] v2 = jres['cointegration_vectors'][:, 1] v3 = jres['eigenvectors'][:, 2] # v3 is not a cointegration vector expected_v1 = numpy.array([1.18712515, -2.37415904, 3.14587243]) numpy.testing.assert_almost_equal(v1, expected_v1) expected_v2 = numpy.array([-0.76082907, 1.52149628, -0.32817785]) numpy.testing.assert_almost_equal(v2, expected_v2) expected_v3 = numpy.array([0.00019993, 0.04721915, -0.04629564]) numpy.testing.assert_almost_equal(v3, expected_v3) self.assertFalse(cointeg.is_not_stationary(numpy.dot(y.as_matrix(), v1), significance='10%')) self.assertFalse(cointeg.is_not_stationary(numpy.dot(y.as_matrix(), v2), significance='10%')) self.assertTrue(cointeg.is_not_stationary(numpy.dot(y.as_matrix(), v3), significance='5%'))
def test_adf(self): s1 = self.load_resource('s1.pickle') s2 = self.load_resource('s2.pickle') s3 = self.load_resource('s3.pickle') a = 0.5 x_1t = numpy.cumsum(s1) + s2 x_2t = a * numpy.cumsum(s1) + s3 x_3t = 100. * s3 self.assertTrue(cointeg.is_not_stationary(x_1t)) self.assertTrue(cointeg.is_not_stationary(x_2t)) self.assertFalse(cointeg.is_not_stationary(x_3t))
def test_johansen(self): s1 = self.load_resource('s1.pickle') s2 = self.load_resource('s2.pickle') s3 = self.load_resource('s3.pickle') a = 0.5 x_1t = numpy.cumsum(s1) + s2 x_2t = a * numpy.cumsum(s1) + s3 x_3t = s3 test_date = datetime(2015, 10, 1) n = len(s1) index = pandas.date_range(test_date - timedelta(10), periods=n, freq='D') y = pandas.DataFrame(index=index, data={'col1': x_1t, 'col2': x_2t, 'col3': x_3t}) vectors = cointeg.get_johansen(y, lag=1) v1 = vectors[0] v2 = vectors[1] expected_v1 = numpy.array([1., -1.9999231, 2.6499922]) numpy.testing.assert_almost_equal(v1, expected_v1) expected_v2 = numpy.array([-2.3183438, 4.6361944, -1.]) numpy.testing.assert_almost_equal(v2, expected_v2) self.assertFalse(cointeg.is_not_stationary(numpy.dot(y.values, v1), significance='10%')) self.assertFalse(cointeg.is_not_stationary(numpy.dot(y.values, v2), significance='10%'))
from mktdata import load_prices_quandl __author__ = 'Christophe' if __name__ == '__main__': logging.basicConfig(format='%(asctime)-15s %(levelname)s %(name)s - %(message)s', level=logging.DEBUG) codes = ['GOOG/NYSE_EWA', 'GOOG/NYSE_EWC'] quandl_data = load_prices_quandl(codes, start_date='2006-04-26', end_date='2012-04-09') results = cointeg.get_johansen(quandl_data, lag=1, significance='90%') print print 'critical_values_trace', results['critical_values_trace'] print 'trace_statistic', results['trace_statistic'] print 'critical_values_max_eigenvalue', results['critical_values_max_eigenvalue'] print 'eigenvalue_statistics', results['eigenvalue_statistics'] cointeg_vector = results['cointegration_vectors'] print print '------', results print '------', cointeg_vector signal = numpy.dot(quandl_data.as_matrix(), cointeg_vector)[:, 0] signal_df = pandas.DataFrame({'signal': signal}, index=quandl_data.index) signal_df.plot() print '--- non-stationarity test', cointeg.is_not_stationary(signal, significance='1%') quandl_data.plot(kind='scatter', x='GOOG.NYSE_EWA - Close', y='GOOG.NYSE_EWC - Close') regression = ols(y=quandl_data['GOOG.NYSE_EWA - Close'], x=quandl_data[['GOOG.NYSE_EWC - Close']]) print regression hedge_ratio = regression.beta[0] portfolio = quandl_data['GOOG.NYSE_EWA - Close'] - hedge_ratio * quandl_data['GOOG.NYSE_EWC - Close'] print portfolio pandas.DataFrame(portfolio, columns=['signal2']).plot() pyplot.show()
__author__ = 'Christophe' if __name__ == '__main__': mu, sigma = 0, 1 # mean and standard deviation n = 10000 s1 = np.random.normal(mu, sigma, n) s2 = np.random.normal(mu, sigma, n) s3 = np.random.normal(mu, sigma, n) a = 0.5 x_1t = np.cumsum(s1) + s2 x_2t = a * np.cumsum(s1) + s3 x_3t = s3 todays_date = datetime.datetime.now().date() index = pd.date_range(todays_date - datetime.timedelta(10), periods=n, freq='D') y = pd.DataFrame(index=index, data={'col1': x_1t, 'col2': x_2t, 'col3': x_3t}) print cointeg.is_not_stationary(x_1t) print cointeg.is_not_stationary(np.diff(x_1t)) print cointeg.is_not_stationary(x_2t) print cointeg.is_not_stationary(np.diff(x_2t)) print cointeg.is_not_stationary(x_3t) jres = cointeg.get_johansen(y, lag=1) print "There are ", jres['count_cointegration_vectors'], "cointegration vectors" v1 = jres['cointegration_vectors'][:, 0] v2 = jres['cointegration_vectors'][:, 1] print v1 print v2 v3 = jres['eigenvectors'][:, 2] # v3 is not a cointegration vector print v1 / -v1[1] print v2 / -v2[1]