class NormalValidation(object): def __init__(self): self.sd = StockData() def validate(self, symbol, start_date, end_date): """ :param start_date, end_date: YYYY-MM-DD """ data = self.sd.fetch_pd_data(symbol, start_date, end_date) daily_changes = data['Adj Close'].astype('float').pct_change(periods=1).tolist()[1:] #daily_changes = np.random.normal(1, 0.5, 1000) bins = 20 n, bins, patches = plt.hist(daily_changes, 50, normed=1, facecolor='green', alpha=0.75) mu = np.mean(daily_changes) sigma = np.std(daily_changes) t_stat, p_value = shapiro(daily_changes) print 'Shapiro Test'.center(110, '-') print 'Mean: %.2f' % mu print 'Std: %.2f' % sigma print 't_stat: %.2f' % t_stat print 'p_value: %.2f' % p_value if p_value > 0.05: print 'At 0.05 significance level, Null hypothsis that points are from normal distribution can NOT be rejected!' else: print 'At 0.05 significance level, Reject Normal distribution!' t_stat, critical_v, sig = anderson(daily_changes, 'norm') print 'Anderson Test'.center(110, '-') print 't_stat: %s' % t_stat print 'critial_v: %s' % critical_v print 'sig: %s' % sig if t_stat > critical_v[2]: print 'At 0.05 significance level, Reject Normal distribution!' else: print 'At 0.05 significance level, Null hypothsis that points are from normal distribution can NOT be rejected!' y = mlab.normpdf(bins, mu, sigma) l = plt.plot(bins, y, 'r--', linewidth=1) plt.grid(True) plt.show()
def __init__(self): self.sd = StockData() self.symbols = self.sd.fetch_list_of_symbols()
class AverageReversionAlgorithm(object): def __init__(self): self.sd = StockData() self.symbols = self.sd.fetch_list_of_symbols() def simulate(self, symbol, start_date, end_date, params): """ :param start_date, end_date: YYYY-MM-DD """ window = params['window'] alpha = params['alpha'] transaction_budget = params['transaction_budget'] adjusted_df = self.construct(symbol, start_date, end_date, window) signal_df = self.find_buy_signals(adjusted_df, alpha=alpha) trading_signal, summary = self.trade(signal_df, transaction_budget=transaction_budget) #self.print_full(trading_signal) summary.setdefault('params', params) return summary @staticmethod def combinations(windows, alphas, transaction_budgets): subsets = [] for window in windows: for alpha in alphas: for transaction_budget in transaction_budgets: subsets.append({'window': window, 'alpha': alpha, 'transaction_budget': transaction_budget}) return subsets def optimize(self, symbol, start_date, end_date, params): windows = params['windows'] alphas = params['alphas'] transaction_budgets = params['transaction_budgets'] all_subsets = self.combinations(windows, alphas, transaction_budgets) performances = [] for subset in all_subsets: performance = self.simulate(symbol, start_date, end_date, subset) performances.append(performance) ranked_performances = sorted(performances, key=lambda x: x['return'])[::-1] return ranked_performances[:3] def construct(self, symbol, start_date, end_date, window): data = self.sd.fetch_pd_data(symbol, start_date, end_date) adj_close = data['Adj Close'].astype('float') ma = get_ma(adj_close, window=window) ma_std = get_std(adj_close, window=window) new_df = pd.DataFrame({'date': data.index, 'adj_close': adj_close, 'ma': ma, 'ma_std': ma_std}) adjusted_new_df = new_df.set_index('date')[window:] adjusted_new_df['diff_rate'] = (adjusted_new_df.adj_close - adjusted_new_df.ma) / adjusted_new_df.ma_std return adjusted_new_df def find_buy_signals(self, df, alpha=2): df['signal'] = 'Standby' df.set_value(df.diff_rate < -alpha, 'signal', 'Buy') df.set_value(df.diff_rate >= 0, 'signal', 'Sell') return df @staticmethod def print_full(x): pd.set_option('display.max_rows', len(x)) print(x) pd.reset_option('display.max_rows') @staticmethod def buy_stock(cash, budget, stock_price): """ :param cash: total cash at hand :param budget: the budget to spend in this transaction """ if cash <= budget: amount = cash / stock_price remaining = cash % stock_price return amount, remaining amount = budget / stock_price remaining = budget % stock_price remaining_cash = cash - budget + remaining return amount, remaining_cash @staticmethod def cal_asset(cash, stocks, price): return sum(stocks) * price + cash def trade(self, signal_df, initial_asset=100000, transaction_budget=10000): signal_df['cash'] = None signal_df['amount'] = None signal_df['asset'] = None signal_df['gains'] = None stocks = [] long_actions = 0 short_actions = 0 for i, (date, row) in enumerate(signal_df.iterrows()): if i == 0: signal_df.set_value(date, 'asset', initial_asset) signal_df.set_value(date, 'gains', 0) if row['signal'] != 'Buy': signal_df.set_value(date, 'cash', initial_asset) signal_df.set_value(date, 'amount', 0) else: adj_close = row['adj_close'] amount, remaining_cash = self.buy_stock(initial_asset, transaction_budget, adj_close) signal_df.set_value(date, 'cash', remaining_cash) stocks.append(amount) signal_df.set_value(date, 'amount', sum(stocks)) long_actions += 1 else: adj_close = row['adj_close'] previous_cash = signal_df.iloc[i-1]['cash'] asset = self.cal_asset(previous_cash, stocks, adj_close) signal_df.set_value(date, 'asset', asset) signal_df.set_value(date, 'gains', asset - initial_asset) if row['signal'] == 'Buy': amount, remaining_cash = self.buy_stock(previous_cash, transaction_budget, adj_close) stocks.append(amount) signal_df.set_value(date, 'cash', remaining_cash) long_actions += 1 elif row['signal'] == 'Sell': signal_df.set_value(date, 'cash', asset) if sum(stocks) > 0: stocks = [] short_actions += 1 else: signal_df.set_value(date, 'cash', previous_cash) signal_df.set_value(date, 'amount', sum(stocks)) gains = signal_df.iloc[i]['gains'] return_rate = gains * 1.0 / initial_asset start_price = signal_df.iloc[0]['adj_close'] benchmark = (signal_df.iloc[i]['adj_close'] - start_price) / start_price summary = {'longs': long_actions, 'shorts': short_actions, 'return': return_rate, 'benchmark': benchmark} return signal_df, summary @staticmethod def cal_multi_assets(previous_cash, stocks, stock_prices): stock_values = [sum(amount) * stock_prices[symbol] for symbol, amount in stocks.iteritems()] return previous_cash + sum(stock_values) def select(self, pools, start_date, end_date, params): window = params['window'] alpha = params['alpha'] transaction_budget = params['transaction_budget'] initial_asset = params.get('initial_asset', 100000) data = {} for symbol in pools: df = self.construct(symbol, start_date, end_date, window) df['signal'] = 'Standby' data.setdefault(symbol, df) panel = pd.Panel.from_dict(data, orient='minor') pd.set_option('mode.chained_assignment', None) # buy signals for i, (date, row) in enumerate(panel['signal'].iterrows()): diff_rates = panel['diff_rate', date] qualified_buys = (diff_rates < -alpha).copy() if qualified_buys.sum() < 1: pass elif qualified_buys.sum() == 1: panel.set_value('signal', date, qualified_buys, 'Buy') pass else: # buy the one with largest deviation best_buy = qualified_buys.idxmin() panel.set_value('signal', date, best_buy, 'Buy') qualified_sells = (diff_rates >= 0).copy() panel.set_value('signal', date, qualified_sells, 'Sell') #panel['signal'].loc[date, qualified_sells] = 'Sell' # trading trading_df = pd.DataFrame(index=panel['signal'].index, columns=['cash', 'asset', 'gains', 'stocks']) stocks = {} longs = 0 shorts = 0 for i, (date, row) in enumerate(panel['signal'].iterrows()): row_df = row.to_frame() row_df.columns = ['signal'] stocks_to_buy = row_df.index[row_df.signal == 'Buy'] stocks_to_sell = row_df.index[row_df.signal == 'Sell'] if i == 0: previous_cash = initial_asset else: previous_cash = trading_df.iloc[i-1]['cash'] stock_prices = panel['adj_close'].loc[date].to_dict() current_asset = self.cal_multi_assets(previous_cash, stocks, stock_prices) trading_df.set_value(date, 'asset', current_asset) trading_df.set_value(date, 'gains', current_asset - initial_asset) trading_df.set_value(date, 'cash', previous_cash) if len(stocks_to_sell) > 0 and len([stock for stock in stocks if stock in stocks_to_sell]) > 0: stocks_to_cash = self.sell_stocks(stocks, stock_prices, stocks_to_sell) # clean stocks [stocks.pop(symbol, 0) for symbol in stocks_to_sell] current_cash = previous_cash + stocks_to_cash trading_df.set_value(date, 'cash', current_cash) shorts += 1 else: current_cash = previous_cash if len(stocks_to_buy) > 0: stock_symbol = stocks_to_buy[0] adj_close = stock_prices[stock_symbol] amount, remaining_cash = self.buy_stock(current_cash, transaction_budget, adj_close) stocks.setdefault(stock_symbol, []).append(amount) trading_df.set_value(date, 'cash', remaining_cash) longs += 1 trading_df.set_value(date, 'stocks', stocks.keys()) return_rate = trading_df.iloc[i]['gains'] / initial_asset summary = {'longs': longs, 'shorts': shorts, 'return': return_rate, 'params': params} #print trading_df return summary def select_all(self, pools, start_date, end_date, params): windows = params['windows'] alphas = params['alphas'] transaction_budgets = params['transaction_budgets'] all_subsets = self.combinations(windows, alphas, transaction_budgets) performances = [] for subset in all_subsets: performance = self.select(pools, start_date, end_date, subset) print performance performances.append(performance) ranked_performances = sorted(performances, key=lambda x: x['return'])[::-1] return ranked_performances @staticmethod def sell_stocks(stocks, stock_prices, stocks_to_sell): sold = [sum(stocks.get(symbol, [])) * stock_prices[symbol] for symbol in stocks_to_sell] return sum(sold)
def __init__(self): self.sd = StockData()