def testParseDates(self): input_date = "2006-01-15" d = Date.strptime(input_date, "%Y-%m-%d") flag = d == Date(2006, 1, 15) self.assertTrue( flag, "date parsing failed\n" " input date: {0:s}\n" " parsed: {1}".format(input_date, d)) input_date = "12/02/2012" d = Date.strptime(input_date, "%m/%d/%Y") flag = d == Date(2012, 12, 2) self.assertTrue( flag, "date parsing failed\n" " input date: {0:s}\n" " parsed: {1}".format(input_date, d)) d = Date.strptime(input_date, "%d/%m/%Y") flag = d == Date(2012, 2, 12) self.assertTrue( flag, "date parsing failed\n" " input date: {0:s}\n" " parsed: {1}".format(input_date, d)) input_date = "20011002" d = Date.strptime(input_date, "%Y%m%d") flag = d == Date(2001, 10, 2) self.assertTrue( flag, "date parsing failed\n" " input date: {0:s}\n" " parsed: {1}".format(input_date, d))
def _filter_sec_on_tiaocang_date(self, tiaocang_date, sec_id): sse_cal = Calendar('China.SSE') tiaocang_date_prev = sse_cal.advanceDate( Date.strptime(str(tiaocang_date)[:10]), Period('-1b')).toDateTime() tiaocang_date_prev2 = sse_cal.advanceDate( Date.strptime(str(tiaocang_date)[:10]), Period('-2b')).toDateTime() price_data = get_sec_price(start_date=tiaocang_date_prev2, end_date=tiaocang_date, sec_ids=sec_id, data_source=self._data_source, csv_path=self._csv_path) price_data = price_data.transpose() price_data.index.name = 'secID' # 去除涨幅过大可能买不到的 price_data['returnFilter'] = price_data[tiaocang_date] / price_data[ tiaocang_date_prev] > 1 + self._filter_return_on_tiaocang_date # 去除有NaN的, 新股 price_data['ipoFilter'] = pd.isnull(price_data[tiaocang_date] * price_data[tiaocang_date_prev] * price_data[tiaocang_date_prev2]) # 去除停牌的,此处判断标准就是连续三天收盘价格一样 price_data['tingpaiFilter'] = ( (price_data[tiaocang_date] == price_data[tiaocang_date_prev]) & (price_data[tiaocang_date_prev] == price_data[tiaocang_date_prev2])) price_data['filters'] = 1 - (1 - price_data['returnFilter']) * ( 1 - price_data['ipoFilter']) * (1 - price_data['tingpaiFilter']) return price_data['filters']
def testParseDates(self): input_date = "2006-01-15" d = Date.strptime(input_date, "%Y-%m-%d") flag = d == Date(2006, 1, 15) self.assertTrue(flag, "date parsing failed\n" " input date: {0:s}\n" " parsed: {1}".format(input_date, d)) input_date = "12/02/2012" d = Date.strptime(input_date, "%m/%d/%Y") flag = d == Date(2012, 12, 2) self.assertTrue(flag, "date parsing failed\n" " input date: {0:s}\n" " parsed: {1}".format(input_date, d)) d = Date.strptime(input_date, "%d/%m/%Y") flag = d == Date(2012, 2, 12) self.assertTrue(flag, "date parsing failed\n" " input date: {0:s}\n" " parsed: {1}".format(input_date, d)) input_date = "20011002" d = Date.strptime(input_date, "%Y%m%d") flag = d == Date(2001, 10, 2) self.assertTrue(flag, "date parsing failed\n" " input date: {0:s}\n" " parsed: {1}".format(input_date, d))
def get_pos_adj_date(start_date, end_date, formats="%Y-%m-%d", calendar='China.SSE', freq='m', return_biz_day=False): """ :param start_date: str/datetime.datetime, start date of strategy :param end_date: str/datetime.datetime, end date of strat egy :param formats: optional, formats of the string date :param calendar: str, optional, name of the calendar to use in dates math :param freq: str, optional, the frequency of data :param return_biz_day: bool, optional, if the return dates are biz days :return: list of datetime.datetime, pos adjust dates """ if isinstance(start_date, str) and isinstance(end_date, str): d_start_date = Date.strptime(start_date, formats) d_end_date = Date.strptime(end_date, formats) elif isinstance(start_date, datetime.datetime) and isinstance( end_date, datetime.datetime): d_start_date = Date.fromDateTime(start_date) d_end_date = Date.fromDateTime(end_date) cal = Calendar(calendar) pos_adjust_date = Schedule(d_start_date, d_end_date, Period(length=1, units=_freqDict[freq]), cal, BizDayConventions.Unadjusted) # it fails if setting dStartDate to be first adjustment date, then use Schedule to compute the others # so i first compute dates list in each period, then compute the last date of each period # last day of that period(month) is the pos adjustment date if _freqDict[freq] == TimeUnits.Weeks: pos_adjust_date = [ Date.nextWeekday(date, Weekdays.Friday) for date in pos_adjust_date[:-1] ] elif _freqDict[freq] == TimeUnits.Months: pos_adjust_date = [ cal.endOfMonth(date) for date in pos_adjust_date[:-1] ] elif _freqDict[freq] == TimeUnits.Years: pos_adjust_date = [ Date(date.year(), 12, 31) for date in pos_adjust_date[:-1] ] if return_biz_day: pos_adjust_date = [ cal.adjustDate(date, BizDayConventions.Preceding) for date in pos_adjust_date ] pos_adjust_date = [Date.toDateTime(date) for date in pos_adjust_date] pos_adjust_date = [ date for date in pos_adjust_date if date <= d_end_date.toDateTime() ] return pos_adjust_date
def calc_alpha_factor_weight_on_date(self, date): """ :param date: datetime, 调仓日 :return: pd.DataFrame, index = [layerFactor], cols= [alpha factor name] 给定调仓日,计算alpha因子的加权矩阵 """ if isinstance(date, basestring): date = Date.strptime(date).toDateTime() ret_low = pd.DataFrame(columns=self._alphaFactorNames) ret_high = pd.DataFrame(columns=self._alphaFactorNames) tiao_cang_date_range = self._tiaoCangDate[ self._tiaoCangDate.index(date) - self._tiaoCangDateWindowSize:self._tiaoCangDate.index(date)] for layerFactor in self._layerFactor: if self._factorWeightType == FactorWeightType.EqualWeight: ret_low.loc[layerFactor.name] = self._alphaFactorSign ret_high.loc[layerFactor.name] = self._alphaFactorSign else: if self._rank_ic is None: self._rank_ic = self.calc_rank_ic() low, high = self._rank_ic[0][ layerFactor.name], self._rank_ic[1][layerFactor.name] low_to_use = low.loc[tiao_cang_date_range] high_to_use = high.loc[tiao_cang_date_range] weight_low = low_to_use.mean(axis=0) / low_to_use.std(axis=0) weight_high = high_to_use.mean(axis=0) / high_to_use.std( axis=0) ret_low.loc[layerFactor.name] = weight_low.values ret_high.loc[layerFactor.name] = weight_high.values return ret_low, ret_high
def calc_alpha_factor_rank_on_date(self, date, factor_low_weight, factor_high_weight): """ :param date, str/datetime, tiaoCangDate :param factor_low_weight, pd.DataFrame, see calc_alpha_factor_weight_on_date :param factor_high_weight, pd.DataFrame, :return: pd.DataFrame, index = [layerFactor, secID, low/high], index = layerfactor, col = alpha factor 给定调仓日,计算secIDs的alpha因子的排位 """ ret = pd.DataFrame() if isinstance(date, basestring): date = Date.strptime(date).toDateTime() for layerFactor in self._layerFactor: # 分层因子下股票分为两组 group_low, group_high = DCAMHelper.seperate_sec_group( layerFactor, date) factor_low = DCAMHelper.get_factor_on_date(self._alphaFactor, group_low, date) factor_high = DCAMHelper.get_factor_on_date( self._alphaFactor, group_high, date) # 排序的顺序由权重决定 # 如果权重为正,那么从低到高排序 # 如果权重为负,那么从高到底排序 # 加权的时候权重使用绝对值 factor_low_rank = pd.DataFrame() factor_high_rank = pd.DataFrame() for alphaFactorName in self._alphaFactorNames: flag_low = True if factor_low_weight[alphaFactorName][ layerFactor.name] >= 0 else False flag_high = True if factor_high_weight[alphaFactorName][ layerFactor.name] >= 0 else False factor_low_rank_col = factor_low[alphaFactorName].rank( ascending=flag_low, axis=0) factor_high_rank_col = factor_high[alphaFactorName].rank( ascending=flag_high, axis=0) factor_low_rank = pd.concat( [factor_low_rank, factor_low_rank_col], axis=1) factor_high_rank = pd.concat( [factor_high_rank, factor_high_rank_col], axis=1) # multi index DataFrame sec_id_index = np.append(factor_low_rank.index, factor_high_rank.index) layer_factor_index = [layerFactor.name] * len(sec_id_index) high_low_index = ['low'] * len(factor_low_rank) + ['high'] * len( factor_high_rank) factor_rank_array = pd.concat([factor_low_rank, factor_high_rank], axis=0).values index = pd.MultiIndex.from_arrays( [sec_id_index, layer_factor_index, high_low_index], names=['secID', 'layerFactor', 'low_high']) alpha_factor_rank = pd.DataFrame(factor_rank_array, index=index, columns=self._alphaFactorNames) # merge ret = pd.concat([ret, alpha_factor_rank], axis=0) ret = factor_na_handler(ret, self._na_handler) return ret
def forward_date(date, tenor, date_format='%Y-%m-%d'): try: # use pyfin instead to get more accurate and flexible date math start_date = Date.strptime(date, date_format) sseCal = Calendar('China.SSE') ret = sseCal.advanceDate(start_date, Period('-' + tenor), endOfMonth=True) # 此处返回的是上一期期末日期,再向后调整一天,以避免区间日期重叠 ret = sseCal.advanceDate(ret, Period('1b')) return str(ret) except NameError: pass
def _filter_sec_on_tiaocang_date(self, tiaocang_date, sec_id): sse_cal = Calendar('China.SSE') tiaocang_date_prev = sse_cal.advanceDate(Date.strptime(str(tiaocang_date)[:10]), '-1b').toDateTime() tiaocang_date_prev2 = sse_cal.advanceDate(Date.strptime(str(tiaocang_date)[:10]), '-2b').toDateTime() price_data = WindMarketDataHandler.get_sec_price_on_date(start_date=tiaocang_date_prev2, end_date=tiaocang_date, sec_ids=sec_id) price_data = price_data.transpose() price_data.index.name = 'sec_id' # 去除涨幅过大可能买不到的 price_data['returnFilter'] = price_data[tiaocang_date] / price_data[ tiaocang_date_prev] > 1 + self._filterReturnOnTiaoCangDate # 去除有NaN的, 新股 price_data['ipoFilter'] = pd.isnull( price_data[tiaocang_date] * price_data[tiaocang_date_prev] * price_data[tiaocang_date_prev2]) # 去除停牌的,此处判断标准就是连续三天收盘价格一样 price_data['tingpaiFilter'] = ((price_data[tiaocang_date] == price_data[tiaocang_date_prev]) & ( price_data[tiaocang_date_prev] == price_data[tiaocang_date_prev2])) price_data['filters'] = 1 - (1 - price_data['returnFilter']) * (1 - price_data['ipoFilter']) * ( 1 - price_data['tingpaiFilter']) return price_data['filters']
def get_report_date(act_date, return_biz_day=True): """ :param act_date: str/datetime.datetime, 任意日期 :param return_biz_day: bool, 是否返回交易日 :return: datetime, 对应应使用的报告日期, 从wind数据库中爬取 此函数的目的是要找到,任意时刻可使用最新的季报数据的日期,比如2-20日可使用的最新季报是去年的三季报(对应日期为9-30), """ if isinstance(act_date, str): act_date = Date.strptime(act_date) elif isinstance(act_date, datetime.datetime): act_date = Date.fromDateTime(act_date) act_month = act_date.month() act_year = act_date.year() if 1 <= act_month <= 3: # 第一季度使用去年三季报的数据 year = act_year - 1 month = 9 day = 30 elif 4 <= act_month <= 7: # 第二季度使用当年一季报 year = act_year month = 3 day = 31 elif 8 <= act_month <= 9: # 第三季度使用当年中报 year = act_year month = 6 day = 30 else: year = act_year # 第四季度使用当年三季报 month = 9 day = 30 if return_biz_day: date_adj = Calendar('China.SSE').adjustDate( Date(year, month, day), BizDayConventions.Preceding) ret = date_adj.toDateTime() else: ret = datetime.datetime(year, month, day) return ret
def check_date(date): from PyFin.DateUtilities import Date if isinstance(date, str): return Date.strptime(date, dateFormat='%Y-%m-%d') else: return Date.fromDateTime(date)
def dcam_strat_main(factor_loader_params, analyzer_params, selector_params, portfolio_params, update_params): # FactorLoader params start_date = factor_loader_params['startDate'] end_date = factor_loader_params['endDate'] factor_norm_dict = factor_loader_params['factorNormDict'] # dcam analyzer params factor_weight_type = analyzer_params.get('factorWeightType', FactorWeightType.ICWeight) tiaocang_date_window_size = analyzer_params.get('tiaoCangDateWindowSize', 12) save_sec_score = analyzer_params.get('saveSecScore', True) # selector params save_sec_selected = selector_params.get('saveSecSelected', True) nb_sec_selected_per_industry_min = selector_params.get( 'nbSecSelectedPerIndustryMin', 5) use_industry_name = selector_params.get('useIndustryName', True) nb_sec_selected_total = selector_params.get('nbSecSelectedTotal', 100) ignore_zero_weight = selector_params.get('ignoreZeroWeight', False) # portfolio params benchmark_sec_id = portfolio_params.get('benchmarkSecID', '000905.SH') re_balance_freq = portfolio_params.get('rebalanceFreq', FreqType.EOM) initial_capital = portfolio_params.get('initialCapital', 1000000000.0) filter_return_on_tiaocang_date = portfolio_params.get( 'filterReturnOnTiaoCangDate', 0.09) data_source = portfolio_params.get('dataSource', DataSource.WIND) update_factor = update_params.get('updateFactor', False) update_sec_score = update_params.get('updateSecScore', False) update_sec_select = update_params.get('updateSecSelect', False) if update_factor: factor = FactorLoader(start_date=start_date, end_date=end_date, factor_norm_dict=factor_norm_dict) factor_data = factor.get_factor_data() else: # TODO factor = None factor_data = None pass if update_sec_score: layer_factor = [ factor_data[name] for name in factor_norm_dict.keys() if factor_norm_dict[name][1] == DCAMFactorType.layerFactor ] alpha_factor = [ factor_data[name] for name in factor_norm_dict.keys() if factor_norm_dict[name][1] == DCAMFactorType.alphaFactor ] alpha_factor_sign = [ factor_data[name][2] for name in factor_norm_dict.keys() if factor_norm_dict[name][1] == DCAMFactorType.alphaFactor ] analyzer = DCAMAnalyzer( layer_factor=layer_factor, alpha_factor=alpha_factor, sec_return=factor_data['RETURN'], tiaocang_date=factor.get_tiaocang_date(), tiaocang_date_window_size=tiaocang_date_window_size, save_sec_score=save_sec_score, factor_weight_type=factor_weight_type, alpha_factor_sign=alpha_factor_sign) sec_score = analyzer.calc_sec_score() else: sec_score = load_sec_score(_secScorePath) if update_sec_select: index_comp = IndexComp(industry_weight=factor_data['IND_WGT']) selector = Selector( sec_score=sec_score, industry=factor_data['INDUSTRY'], nb_sec_selected_per_industry_min=nb_sec_selected_per_industry_min, index_comp=index_comp, save_sec_selected=save_sec_selected, use_industry_name=use_industry_name, nb_sec_selected_total=nb_sec_selected_total, ignore_zero_weight=ignore_zero_weight) selector.industry_neutral = True selector.sec_selection() sec_selected = selector.sec_selected_full_info pprint(selector.sec_selected_full_info) else: sec_selected = load_sec_selected(_secSelectedPath) # construct strategy ptf # 价格数据需要使用到最后一个调仓日的后一个月末 sse_cal = Calendar('China.SSE') end_date_for_price_data = str( sse_cal.advanceDate(Date.strptime(end_date), '1m')) strategy = Portfolio( sec_selected=sec_selected, end_date=end_date_for_price_data, initial_capital=initial_capital, filter_return_on_tiaocang_date=filter_return_on_tiaocang_date, data_source=data_source, benchmark_sec_id=benchmark_sec_id, re_balance_freq=re_balance_freq) strategy.evaluate_ptf_return()