def cal_AV(data, start, end, dim='rate'): """ Calculate Annual-volatility :param data: original OHLCV data :param start: start date :param end: end date :param dim: daily return dim :returns: AV :raises: none """ # calculate the period start_date = util.time_2_string(data[start:end].index.min()) end_date = util.time_2_string(data[start:end].index.max()) num_days = util.num_days_between(start_date, end_date) - 1 AV = (data[dim].var() * (365 / num_days))**0.5 return AV
def cal_APR(data, start, end, dim='value', dividends=0): """ Calculate Annual-Percentile-Rate :param data: original OHLCV data :param start: start date :param end: end date :param dim: price dim to calculate :param dividends: divndends to add :returns: APR :raises: none """ # calculate the HPR in specific period HPR = cal_HPR(data, start, end, dim, dividends) # convert the period to year start_date = util.time_2_string(data[start:end].index.min()) end_date = util.time_2_string(data[start:end].index.max()) period_in_year = util.num_days_between(start_date, end_date) / 365.0 # calculate APR APR = HPR / period_in_year return APR
def cal_EAR(data, start, end, dim='value', dividends=0): """ Calculate Effective-Annual-Rate :param data: original OHLCV data :param start: start date :param end: end date :param dim: price dim to calculate :param dividends: divndends to add :returns: EAR :raises: none """ # calculate HPR in specific period HPR = cal_HPR(data, start, end, dim, dividends) + 1 # convert the period to year start_date = util.time_2_string(data[start:end].index.min()) end_date = util.time_2_string(data[start:end].index.max()) period_in_year = util.num_days_between(start_date, end_date) / 365.0 # calculate EAR EAR = pow(HPR, 1 / period_in_year) - 1 return EAR
def __init__(self, data, start_date=None, end_date=None, num_days=365, load_local_data=True): # copy data(sec_data, ta_data), initialize record with ta_data self.data = data.copy() ta_data = data['ta_data'] for k in ta_data.keys(): symbol = k.split('_')[0] self.record[symbol] = ta_data[k].copy() # initialize record self.init_record(load_local_data=load_local_data) # set default start_date/end_date if (start_date is not None) and (end_date is None): end_date = util.string_plus_day(string=start_date, diff_days=num_days) elif (start_date is None) and (end_date is not None): start_date = util.string_plus_day(string=end_date, diff_days=-num_days) elif (start_date is None) and (end_date is None): end_date = util.time_2_string(datetime.datetime.today().date()) start_date = util.string_plus_day(string=end_date, diff_days=-num_days) self.start_date = start_date self.end_date = end_date
def analyze(self, sort=True): # get records for self.sec_list records = dict((key,value) for key,value in self.record.items() if key in self.sec_list) # init dict for storing results analysis = { 'symbol': [], 'start_date': [], 'end_date': [], 'start_money': [], 'end_money': [], 'EAR': [], 'sharp_ratio': [], 'max_drawndown': [] } # go through each stock for symbol in records.keys(): # get record data record_data = records[symbol]#[self.start_date:self.end_date] if len(record_data) == 0: print(f'no record for {symbol}') continue min_idx = record_data.index.min() max_idx = record_data.index.max() # analysis profit, hpr, ear, etc. analysis['symbol'].append(symbol) analysis['start_date'].append(util.time_2_string(min_idx.date())) analysis['end_date'].append(util.time_2_string(max_idx.date())) analysis['start_money'].append(record_data.loc[min_idx, 'value']) analysis['end_money'].append(record_data.loc[max_idx, 'value']) EAR = finance_util.cal_EAR(data=record_data, start=min_idx.date(), end=max_idx.date(), dim='value', dividends=0) analysis['EAR'].append(EAR) sharp_ratio = finance_util.cal_sharp_ratio(data=record_data, start=None, end=None, price_dim='value') analysis['sharp_ratio'].append(sharp_ratio) max_drawndown = finance_util.cal_max_drawndown(data=record_data) analysis['max_drawndown'].append(max_drawndown) # transform dict to dataframe analysis = pd.DataFrame(analysis).set_index('symbol') if sort: analysis = analysis.sort_values('EAR', ascending=False) # calculate sum and mean for non benchmark stocks non_benchmark_list = [x for x in analysis.index.tolist() if x != 'benchmark'] non_benchmark_analysis = analysis.loc[non_benchmark_list, analysis.columns].copy() if len(non_benchmark_analysis) > 1: # calculate sum and mean analysis_mean = non_benchmark_analysis.mean() analysis_sum = non_benchmark_analysis.sum() # calculate sum of the whole portfilo value_sum = self.record['portfolio'].copy() value_sum['rate'] = value_sum['value'].pct_change().fillna(0) total_ear = finance_util.cal_EAR(data=value_sum, dim='value', start=None, end=None) total_max_drawndown = finance_util.cal_max_drawndown(data=value_sum, dim='value') total_sharp_ratio = finance_util.cal_sharp_ratio(data=value_sum, price_dim='value', rate_dim='rate', start=None, end=None) # resort dataframe if self.benchmark is not None: right_order = [x for x in analysis.index if x != 'benchmark'] + ['benchmark'] analysis = analysis.loc[right_order].copy() analysis = analysis.append(pd.DataFrame({'start_date': '', 'end_date': '', 'start_money': analysis_mean['start_money'], 'end_money':analysis_mean['end_money'], 'EAR':total_ear, 'sharp_ratio':total_sharp_ratio, 'max_drawndown':total_max_drawndown}, index=['mean'])) analysis = analysis.append(pd.DataFrame({'start_date': '', 'end_date': '', 'start_money': analysis_sum['start_money'], 'end_money':analysis_sum['end_money'], 'EAR':total_ear, 'sharp_ratio':total_sharp_ratio, 'max_drawndown':total_max_drawndown}, index=['total'])) # post process analysis['profit'] = analysis['end_money'] - analysis['start_money'] analysis['HPR'] = analysis['profit'] / analysis['start_money'] analysis = analysis[['start_date', 'end_date', 'start_money', 'end_money', 'profit', 'HPR', 'EAR', 'sharp_ratio', 'max_drawndown']].round(2) return analysis
def recalculate_data(self, sec_list, mode=None, start_date=None, end_date=None): # verify value of mode if mode not in ['trend', 'signal', None]: print(f'Unknown mode: {mode}') return None # copy sec_data, ta_data sec_data = self.data['sec_data'].copy() ta_data = self.data['ta_data'].copy() # set start_date/end_date for recalculation start_date = self.start_date if start_date is None else start_date end_date = self.end_date if end_date is None else end_date global_min_date = None for k in sec_data.keys(): symbol = k.split('_')[0] min_date = sec_data[k][start_date:].index.min() global_min_date = min_date if global_min_date is None else min(min_date, global_min_date) start_date = util.time_2_string(min_date) # set recalculate mode for each symbol cut_data = [] recalculate_trend = [] recalculate_signal = [] for symbol in self.record.keys(): # skip symbols which not in sec_list if symbol not in sec_list: # print(f'{symbol} not in sec_list') continue # get data and its range tmp_data = self.record[symbol] min_idx = util.time_2_string(tmp_data.index.min()) max_idx = util.time_2_string(tmp_data.index.max()) # for symbols which ta_data range covers start_date~end_date, process according to mode if (min_idx <= start_date) and (max_idx >= end_date): if mode is None: cut_data.append(symbol) elif mode == 'signal': recalculate_signal.append(symbol) elif mode == 'trend': recalculate_trend.append(symbol) else: print(f'Unknown mode: {mode}') continue # for symbols which ta_data range not covers start_date~end_date, recalculate from trend else: recalculate_trend.append(symbol) # for symbols just need to be cutted cut_data = list(set(cut_data)) for symbol in cut_data: self.record[symbol] = self.record[symbol][start_date:end_date].copy() # for symbols need to recalculate signals recalculate_signal = list(set(recalculate_signal)) for symbol in recalculate_signal: self.record[symbol] = ta_util.calculate_ta_signal(df=self.record[symbol])[start_date:end_date] # for symbols need to recalculate trend and signal recalculate_trend += [x for x in sec_list if x not in self.record.keys()] recalculate_trend = list(set(recalculate_trend)) # read raw data for symbol that not in sec_data yet for symbol in recalculate_trend: if f'{symbol}_day' not in sec_data.keys(): print(f'Simulator does not have raw data for {symbol}, not able to recalculate trend') for symbol_interval in sec_data.keys(): symbol, interval = symbol_interval.split('_') if symbol in recalculate_trend: if len(sec_data[symbol_interval][start_date:end_date]) > 0: self.record[symbol] = ta_util.calculation(df=sec_data[symbol_interval][start_date:end_date], symbol=symbol) else: print(f'{symbol} has no data, remove it from record') self.record.pop(symbol) else: continue # reset record self.init_record(load_local_data=False)
def download_stock_data_from_tiger(sec_code, time_col='time', quote_client=None, download_limit=1200, start_date=None, end_date=None, file_path='drive/My Drive/stock_data_us/', file_format='.csv', is_return=False, is_print=True): # 构建股票数据文件名 filename = file_path + sec_code + file_format # 下载开始 stage = 'downloading_started' try: # 查看是否已存在下载好的文件, 若有则读取, 若没有则初始化 stage = 'loading_existed_data' data = pd.DataFrame() if os.path.exists(filename): data = read_stock_data(sec_code, file_path=file_path, file_format=file_format, time_col='Date') # 记录原始数据记录数, 更新下载起始日期 init_len = len(data) if init_len > 0: start_date = util.time_2_string(data.index.max(), date_format='%Y-%m-%d') # 从老虎API下载数据 stage = 'downloading_new_data' # 将开始结束时间转化为时间戳 if start_date is not None: begin_time = round( time.mktime(util.string_2_time(start_date).timetuple()) * 1000) else: begin_time = 0 if end_date is not None: end_time = round( time.mktime(util.string_2_time(end_date).timetuple()) * 1000) else: end_time = round(time.time() * 1000) # 开始下载数据 tmp_len = download_limit new_data = pd.DataFrame() while tmp_len >= download_limit: tmp_data = quote_client.get_bars([sec_code], begin_time=begin_time, end_time=end_time, limit=download_limit) tmp_len = len(tmp_data) new_data = tmp_data.append(new_data) end_time = int(tmp_data.time.min()) # 处理下载的数据 stage = 'processing_new_data' if len(new_data) > 0: new_data.drop('symbol', axis=1, inplace=True) new_data[time_col] = new_data[time_col].apply( lambda x: util.timestamp_2_time(x).date()) new_data.rename(columns={ 'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close', 'volume': 'Volume', 'time': 'Date' }, inplace=True) new_data['Adj Close'] = new_data['Close'] time_col = 'Date' new_data = util.df_2_timeseries(df=new_data, time_col=time_col) # 附上已有数据 data = data.append(new_data, sort=False) # 去重,保存数据 stage = 'saving_data' data = data.reset_index().drop_duplicates(subset=time_col, keep='last') data.sort_values(by=time_col, ) data.to_csv(filename, index=False) # 对比记录数量变化 if is_print: final_len = len(data) diff_len = final_len - init_len print( '[From Tiger]%(sec_code)s: %(first_date)s - %(latest_date)s, 新增记录 %(diff_len)s/%(final_len)s, ' % dict(diff_len=diff_len, final_len=final_len, first_date=data[time_col].min().date(), latest_date=data[time_col].max().date(), sec_code=sec_code)) except Exception as e: print(sec_code, stage, e) # 返回数据 if is_return: data = util.df_2_timeseries(data, time_col=time_col) return data
def download_stock_data_from_yahoo(sec_code, time_col='Date', start_date=None, end_date=None, file_path='drive/My Drive/stock_data_us/', file_format='.csv', is_return=False, is_print=True): # 构建股票数据文件名 filename = file_path + sec_code + file_format # 下载开始 stage = 'downloading_started' try: # 查看是否已存在下载好的文件, 若有则读取, 若没有则初始化 stage = 'loading_existed_data' data = pd.DataFrame() if os.path.exists(filename): data = read_stock_data(sec_code, file_path=file_path, file_format=file_format, time_col=time_col) # 记录原始数据记录数, 更新下载的起始日期 init_len = len(data) if init_len > 0: start_date = util.time_2_string(data.index.max(), date_format='%Y-%m-%d') # 下载更新新下载的数据并保存 stage = 'appending_new_data' tmp_data = web.DataReader(sec_code, 'yahoo', start=start_date, end=end_date) if len(tmp_data) > 0: data = data.append(tmp_data, sort=False) # 保存数据 stage = 'saving_data' data = data.reset_index().drop_duplicates(subset=time_col, keep='last') data.to_csv(filename, index=False) # 对比记录数量变化 if is_print: final_len = len(data) diff_len = final_len - init_len print( '%(sec_code)s: %(first_date)s - %(latest_date)s, 新增记录 %(diff_len)s/%(final_len)s, ' % dict(diff_len=diff_len, final_len=final_len, first_date=data[time_col].min().date(), latest_date=data[time_col].max().date(), sec_code=sec_code)) except Exception as e: print(sec_code, stage, e) # 返回数据 if is_return: data = util.df_2_timeseries(data, time_col=time_col) return data