Example #1
0
def get_stock_returns(stocks, start_date, end_date, freq):
    close_price = Parallel(n_jobs=10, backend='threading', verbose=5)(
        delayed(csf.get_stock_hist_bar)(code, freq,
                                        start_date=start_date,
                                        end_date=end_date,
                                        field=['date', 'close'])
        for code in stocks)
    for start_date, p in zip(stocks, close_price):
        p['tick'] = start_date
    close_price = pd.concat(close_price)
    close_price = close_price.dropna()
    # index.name原来为空
    close_price.index.name = 'dt'
    # 转成一个frame, index:dt, columns:tick
    close_price = (close_price.set_index('tick', append=True)
                   .to_panel()['close']
                   .sort_index()
                   .fillna(method='ffill')
                   )
    # 取每个周期末
    group_key = {'M': [close_price.index.year, close_price.index.month],
                 'W': [close_price.index.year, close_price.index.week],
                 'Q': [close_price.index.year, close_price.index.quarter]
                 }
    close_price = close_price.groupby(group_key[freq]).tail(1)
    returns = close_price.pct_change().shift(-1).dropna(axis=1, how='all')
    returns.index = returns.index.map(lambda dt: str(dt.date()))
    returns.index.name = 'date'
    returns = returns.unstack().to_frame()
    returns.columns = ['ret']
    returns = returns.swaplevel(0, 1).sort_index()
    returns.index.names = ['date', 'code']
    return returns