Example #1
0
    def test_pct_change_periods_freq(self):
        # GH 7292
        rs_freq = self.tsframe.pct_change(freq='5B')
        rs_periods = self.tsframe.pct_change(5)
        assert_frame_equal(rs_freq, rs_periods)

        rs_freq = self.tsframe.pct_change(freq='3B', fill_method=None)
        rs_periods = self.tsframe.pct_change(3, fill_method=None)
        assert_frame_equal(rs_freq, rs_periods)

        rs_freq = self.tsframe.pct_change(freq='3B', fill_method='bfill')
        rs_periods = self.tsframe.pct_change(3, fill_method='bfill')
        assert_frame_equal(rs_freq, rs_periods)

        rs_freq = self.tsframe.pct_change(freq='7B',
                                          fill_method='pad',
                                          limit=1)
        rs_periods = self.tsframe.pct_change(7, fill_method='pad', limit=1)
        assert_frame_equal(rs_freq, rs_periods)

        rs_freq = self.tsframe.pct_change(freq='7B',
                                          fill_method='bfill',
                                          limit=3)
        rs_periods = self.tsframe.pct_change(7, fill_method='bfill', limit=3)
        assert_frame_equal(rs_freq, rs_periods)

        empty_ts = DataFrame(index=self.tsframe.index,
                             columns=self.tsframe.columns)
        rs_freq = empty_ts.pct_change(freq='14B')
        rs_periods = empty_ts.pct_change(14)
        assert_frame_equal(rs_freq, rs_periods)
Example #2
0
def get_stocks(tickers, market, start_date, end_date, frequency):

    #Set Frequency for resampling
    FREQ_DICT = {
        'Weekly': 'W-FRI',
        'Monthly': 'M',
    }
    
    start_yahoo = datetime.datetime.strptime(start_date, '%d/%m/%Y')
    end_yahoo = datetime.datetime.strptime(end_date, '%d/%m/%Y')
    
    #Set market portfolio
    if (market != 'TA100') and (market != 'TA25'):
        if (market == 'SP500'):
            prices = DataFrame(web.get_data_yahoo('VFINX', start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how='last', fill_method='ffill'), columns=['SP500'])
        else:
            prices = DataFrame(web.get_data_yahoo(market, start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how='last', fill_method='ffill'), columns=[market])
    else:
        prices = get_index_price(index_id = market, start_date = start_date, end_date = end_date, frequency = frequency).resample(FREQ_DICT[frequency], how = 'last')
    
    #Set Stocks Prices
    i = 0
    while (i < len(tickers)):
        get_df_ticker = DataFrame(web.get_data_yahoo(tickers[i], start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how = 'last'), columns=[tickers[i]])
        prices = pd.concat([prices, get_df_ticker], join='outer', axis = 1)
      
        i += 1
    
    changes = prices.pct_change()
        
    return prices, changes[1:]
Example #3
0
    def test_pct_change_shift_over_nas(self):
        s = Series([1., 1.5, np.nan, 2.5, 3.])

        df = DataFrame({'a': s, 'b': s})

        chg = df.pct_change()
        expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2])
        edf = DataFrame({'a': expected, 'b': expected})
        assert_frame_equal(chg, edf)
Example #4
0
    def test_pct_change_periods_freq(self, freq, periods, fill_method, limit):
        # GH 7292
        rs_freq = self.tsframe.pct_change(freq=freq,
                                          fill_method=fill_method,
                                          limit=limit)
        rs_periods = self.tsframe.pct_change(periods,
                                             fill_method=fill_method,
                                             limit=limit)
        assert_frame_equal(rs_freq, rs_periods)

        empty_ts = DataFrame(index=self.tsframe.index,
                             columns=self.tsframe.columns)
        rs_freq = empty_ts.pct_change(freq=freq,
                                      fill_method=fill_method,
                                      limit=limit)
        rs_periods = empty_ts.pct_change(periods,
                                         fill_method=fill_method,
                                         limit=limit)
        assert_frame_equal(rs_freq, rs_periods)
Example #5
0
def aagr(df: pd.DataFrame, window: int=10):  # TODO: don't include the window
    """average annual growth rate

    Parameters
    ----------
    window : `int`
        the rolling window size

    Returns
    -------
    return : `DataFrame`
        The rolling apply result
    """
    pct = df.pct_change()
    return pct.rolling(window).apply(np.mean).dropna()
Example #6
0
obj.describe()
## Correlation and Covariance
import pandas.io.data as web
all_data = {}
for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']:
    all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000', '1/1/2010')
    
price = DataFrame({tic: data['Adj Close'],
for tic, data in all_data.iteritems()})
price = DataFrame({tic: data['Adj Close'] 
for tic, data in all_data.iteritems()})
price
volume = DataFrame({tic: data['Volume'] 
for tic, data in all_data.iteritems()})
# percent changes of the prices:
returns = price.pct_change()
returns.tails()
returns.tail()
returns.MSFT.corr(returns.IBM) # correlation of the overlapping non-NA
returns.MSFT.cov(returns.IBM) # covariance of the overlapping non-NA
returns.corr()
returns.cov()
returns.corrwith(returns.IBM)
returns.corrwith(volume)
## Unique values, Value counts, and membership
obj = Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])
uniques = obj.unique()
uniques
obj.value_counts()
obj.value_counts() # value frequencies
from pandas import value_counts
def main():
    """
    Calculation and aggregation of summary statistics
    """

    # Summary of statistics
    # return is not ndarray
    df = DataFrame([[1.4, np.nan],
                    [7.1, -4.5],
                    [np.nan, np.nan],
                    [0.75, -1.3]],
                   index=list('abcd'),
                   columns=['one', 'two'])
    print df
    print df.sum()
    print df.sum(axis=1)
    print df.mean(axis=1) # exclude nan
    print df.mean(axis=1, skipna=False)
    print df.idxmin()
    print df.idxmax()
    print df.cumsum()
    print df.describe()
    # values are not number
    obj = Series(list('aabc') * 4)
    print obj.describe()


    methods = ['count', 'min', 'max', # 'argmin', 'argmax',
               'quantile', 'median', 'mad', 'var', 'std',
               'skew', 'kurt', 'cummin', 'cummax', 'cumprod',
               'diff', 'pct_change']

    for method in methods:
        print u'「{0}」'.format(method)
        print getattr(df, method)()
        print ''

    # Correspond and Covariance
    all_data = {}
    lst = [] # ['AAPL', 'IBM', 'MSFT'] #, 'GOOG']:
    for ticket in lst: #, 'GOOG']:
        # IOError: after 3 tries, Yahoo! did not return a 200
        # for url 'http://ichart.finance.yahoo.com/table.csv?s=GOOG&a=0&b=1&c=2000&d=0&e=1&f=2010&g=d&ignore=.csv'
        all_data[ticket] = pd.io.data.get_data_yahoo(ticket, '1/1/2000', '1/1/2010')
    price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()})
    volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()})
    if all_data:
        returns = price.pct_change()
        print returns.tail()
        print ''
        print returns.MSFT.corr(returns.IBM)
        print returns.MSFT.cov(returns.IBM)
        print ''
        print returns.corr()
        print returns.cov()
        print ''
        print returns.corrwith(returns.IBM)
        print returns.corrwith(volume)

    # unique, frequency, belong
    print '',''
    obj = Series(list('cadaabbcc'))
    uniques = obj.unique()
    print uniques
    print obj.value_counts()
    print pd.value_counts(obj.values, sort=False)
    mask = obj.isin(['b', 'c'])
    print mask
    print obj[mask]

    data = DataFrame({
        'Qu1' : [1,3,4,3,4],
        'Qu2' : [2,3,1,2,3],
        'Qu3' : [1,5,2,4,4],
    })
    print data
    print data.apply(pd.value_counts).fillna(0)
import matplotlib.pyplot as plt 
from random import randint 
now = datetime.datetime.now()

list = '^GSPC'
start = None
while start is None:
    try:
        start = datetime.datetime(randint(1950,2015), randint(1,12), randint(1,31))
    except:
        pass 
end = datetime.datetime(now.year, now.month, now.day)

df = pd.io.data.get_data_yahoo(list, start, end)['Adj Close']
df = DataFrame(df)
df['Returns'] = df.pct_change()
df['Date'] = df.index 
df['Date'] = [time.date() for time in df['Date']] 
l = df.index.values
for i in range(0,len(l)):
    df.loc[l[i], 'DayoftheWeek'] = datetime.datetime.strptime(str(df.loc[l[i], 'Date']), '%Y-%m-%d').strftime('%A') 

days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
Monday = 0 
MonCount = 0
Mon = []
Tuesday = 0
TueCount = 0
Tue = []
Wednesday = 0
WedCount = 0
Example #9
0
import matplotlib.pyplot as plt
from collections import defaultdict

plt.interactive(True)
names = ['AAPL', 'GOOG', 'MSFT', 'DELL', 'GS', 'MS', 'BAC', 'C']


def get_px(stock, start, end):
    print('Get ' + stock)
    return web.get_data_yahoo(stock, start, end)['Adj Close']


px = DataFrame({n: get_px(n, '1/1/2009', '6/1/2012') for n in names})

px = px.asfreq('B').fillna(method='pad')
rets = px.pct_change()
((1 + rets).cumprod() - 1).plot()
print('block')


def calc_mom(price, lookback, lag):
    mon_ret = price.shift(lag).pct_change(lookback)
    ranks = mon_ret.rank(axis=1, ascending=False)
    demeaned = ranks - ranks.mean(axis=1)
    return demeaned / demeaned.std(axis=1)


compound = lambda x: (1 + x).prod() - 1
daily_sr = lambda x: x.mean() / x.std()