def create_factor_analysis(ds, **kwargs): ref_date = kwargs['next_execution_date'] if not isBizDay('china.sse', ref_date): logger.info("{0} is not a business day".format(ref_date)) return ref_date = advanceDateByCalendar('china.sse', ref_date, '-2b').strftime('%Y-%m-%d') factor_name = kwargs['factor_name'] logger.info("updating '{0}' on {1}".format(factor_name, ref_date)) # small universe, risk_neutral return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=True, risk_neutral=True) if return_table is not None: upload(ref_date, return_table, destination, 'performance', factor_name, 'risk_neutral') # small universe, top_100 return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=True, risk_neutral=False) if return_table is not None: upload(ref_date, return_table, destination, 'performance', factor_name, 'top_100') # small universe, risk_neutral return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=False, risk_neutral=True) if return_table is not None: upload(ref_date, return_table, destination, 'performance_big_universe', factor_name, 'risk_neutral') # small universe, top_100 return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=False, risk_neutral=False) if return_table is not None: upload(ref_date, return_table, destination, 'performance_big_universe', factor_name, 'top_100')
def exchange_suspend_info(ref_date, force_update=False): start_date = ref_date if not force_update: start_date = (find_latest_date() + dt.timedelta(days=1)) end_date = ref_date date_range = pd.date_range(start_date, end_date) datas = [] for date in date_range: if isBizDay('china.sse', date): datas.append(suspend_info(date.strftime('%Y-%m-%d'))) spyder_logger.info('Scraping finished for date {0}'.format(date)) if not datas: spyder_logger.info('No data is available for {0}'.format(ref_date)) return total_table = pd.concat(datas) total_table.drop_duplicates(['停(复)牌时间', '证券代码'], inplace=True) if not total_table.empty: insert_table(total_table, ['effectiveDate', 'instrumentID', 'instrumentName', 'status', 'reason', 'stopTime'], 'suspend_info', exchange_db_settings)
def update_return_data_300(ds, **kwargs): ref_date = kwargs['next_execution_date'] if not isBizDay('china.sse', ref_date): logger.info("{0} is not a business day".format(ref_date)) return 0 start_date = advanceDateByCalendar('china.sse', ref_date, '-30b') for date in bizDatesList('china.sse', start_date, ref_date): date = date.strftime('%Y-%m-%d') conn1 = create_ms_engine('PortfolioManagements300') df = fetch_date('StockReturns', date, conn1) conn2 = create_my_engine() delete_data('return_300', date, conn2) insert_data('return_300', df, conn2) conn3 = create_my_engine2() delete_data('return_300', date, conn3) insert_data('return_300', df, conn3) return 0
def check_holiday(this_date): flag = isBizDay('china.sse', this_date) if not flag: alpha_logger.info( 'Job will be omitted as {0} is a holiday'.format(this_date)) return flag
def get_nffund_idx_etf_component(date, index): date = dt.datetime.strptime(date, '%Y%m%d') if not isBizDay('China.SSE', date): date = advanceDateByCalendar('China.SSE', date, '-1b') pre_trading_date = advanceDateByCalendar('China.SSE', date, '-1b').strftime('%Y%m%d') if index == 'zz500': date = date.strftime('%Y%m%d') url = "http://www.nffund.com/etf/bulletin/ETF500/510500{date}.txt".format( date=date) html_text = requests.get(url)._content.decode('gbk').split( 'TAGTAG\r')[1] res = [] col_name = ['Code', 'drop', 'Volume', 'drop', 'drop', 'drop', 'drop'] for line in html_text.split('\r'): res.append(line.replace(' ', '').replace('\n', '').split('|')) res = pd.DataFrame(res, columns=col_name) res = res.drop('drop', axis=1).iloc[:500] elif index == 'hs300': url = "http://www.huatai-pb.com/etf-web/etf/index?fundcode=510300&beginDate={date}".format( date=date.strftime('%Y-%m-%d')) html_text = requests.get(url)._content.decode('utf8') soup = BeautifulSoup(html_text, "lxml") res = [] for item in soup.find_all('tr', {'align': 'center'})[1:]: sub_item = item.find_all('td') res.append([sub_item[0].text, sub_item[2].text]) col_name = ['Code', 'Volume'] res = pd.DataFrame(res, columns=col_name) elif index == 'sz50': url = "http://fund.chinaamc.com/product/fundShengoushuhuiqingdan.do" html_text = requests.post(url, data={'querryDate': date.strftime('%Y-%m-%d'), 'fundcode': '510050'})\ ._content.decode('utf8') soup = BeautifulSoup(html_text, "lxml") res = [] for item in soup.find_all('tr', '')[17:]: sub_item = item.find_all('td') res.append([sub_item[0].text, sub_item[2].text]) col_name = ['Code', 'Volume'] res = pd.DataFrame(res, columns=col_name) else: raise KeyError('Do not have source for index %s yet...' % index) # convert string code to int code res['Code'] = res['Code'].apply(int) # fetch eod close price engine = create_engine( 'mssql+pymssql://sa:[email protected]/MultiFactor?charset=utf8') sql = 'select [Code], [Close] as PreClose from TradingInfo1 where Date = %s' % pre_trading_date close_data = pd.read_sql(sql, engine) res = res.merge(close_data, on='Code', how='left') # calculate weight res['weight'] = res['PreClose'].apply(float) * res['Volume'].apply(float) res['weight'] = res['weight'] / res['weight'].sum() res = res[['Code', 'weight']] return res
def update_factor_performance_big_universe_top_100(ds, **kwargs): ref_date = kwargs['next_execution_date'] if not isBizDay('china.sse', ref_date): logger.info("{0} is not a business day".format(ref_date)) return 0 ref_date = advanceDateByCalendar('china.sse', ref_date, '-2b') ref_date = ref_date.strftime('%Y-%m-%d') previous_date = advanceDateByCalendar('china.sse', ref_date, '-1b') this_day_pos, total_data = create_ond_day_pos(ref_date, source_db, big_universe=True, risk_neutral=False) last_day_pos, _ = create_ond_day_pos(previous_date, source_db, big_universe=True, risk_neutral=False) return_table = settlement(ref_date, this_day_pos, total_data['bm'].values, total_data['D1LogReturn'].values, type='top_100') pos_diff_dict = {} for name in this_day_pos.columns.difference(['industry']): for ind in this_day_pos.industry.unique(): pos_series = this_day_pos.loc[this_day_pos.industry == ind, name] if name in last_day_pos: last_series = last_day_pos.loc[last_day_pos.industry == ind, name] pos_diff = pos_series.sub(last_series, fill_value=0) else: pos_diff = pos_series pos_diff_dict[(name, ind)] = pos_diff.abs().sum() pos_series = this_day_pos[name] if name in last_day_pos: last_series = last_day_pos[name] pos_diff = pos_series.sub(last_series, fill_value=0) else: pos_diff = pos_series pos_diff_dict[(name, 'total')] = pos_diff.abs().sum() pos_diff_series = pd.Series(pos_diff_dict, name='turn_over') pos_diff_series.index.names = ['portfolio', 'industry'] pos_diff_series = pos_diff_series.reset_index() return_table = pd.merge(return_table, pos_diff_series, on=['portfolio', 'industry']) return_table['source'] = 'tiny' return_table['universe'] = 'zz500_expand' upload(ref_date, return_table, destination_db, 'performance')
def update_risk_factor_300(ds, **kwargs): ref_date = kwargs['next_execution_date'] if not isBizDay('china.sse', ref_date): logger.info("{0} is not a business day".format(ref_date)) return 0 ref_date = ref_date.strftime('%Y-%m-%d') conn1 = create_ms_engine('PortfolioManagements300') df = fetch_date('RiskFactor', ref_date, conn1) conn2 = create_my_engine() delete_data('risk_factor_300', ref_date, conn2) insert_data('risk_factor_300', df, conn2) conn3 = create_my_engine2() delete_data('risk_factor_300', ref_date, conn3) insert_data('risk_factor_300', df, conn3) return 0
def update_portfolio_long_top(ds, **kwargs): ref_date = kwargs['next_execution_date'] if not isBizDay('china.sse', ref_date): logger.info("{0} is not a business day".format(ref_date)) return 0 ref_date = ref_date.strftime('%Y-%m-%d') conn1 = create_ms_engine('FactorPerformance') df = fetch_date('Portfolio_LongTop_500', ref_date, conn1) conn2 = create_my_engine() delete_data('portfolio_longtop', ref_date, conn2) insert_data('portfolio_longtop', df, conn2) conn3 = create_my_engine2() delete_data('portfolio_longtop', ref_date, conn3) insert_data('portfolio_longtop', df, conn3) return 0
def update_trade_data(ds, **kwargs): ref_date = kwargs['next_execution_date'] if not isBizDay('china.sse', ref_date): logger.info("{0} is not a business day".format(ref_date)) return 0 ref_date = ref_date.strftime('%Y-%m-%d') conn1 = create_ms_engine('MultiFactor') df = fetch_date('TradingInfo1', ref_date, conn1) conn2 = create_my_engine() delete_data('trade_data', ref_date, conn2) insert_data('trade_data', df, conn2) conn3 = create_my_engine2() delete_data('trade_data', ref_date, conn3) insert_data('trade_data', df, conn3) return 0
def update_factor_indicator(ds, **kwargs): ref_date = kwargs['next_execution_date'] if not isBizDay('china.sse', ref_date): logger.info("{0} is not a business day".format(ref_date)) return 0 ref_date = advanceDateByCalendar('china.sse', ref_date, '-2b') ref_date = ref_date.strftime('%Y-%m-%d') conn1 = create_ms_engine('FactorPerformance') df = fetch_date('FactorIndicator_500', ref_date, conn1) conn2 = create_my_engine() delete_data('factor_indicator', ref_date, conn2) insert_data('factor_indicator', df, conn2) conn3 = create_my_engine2() delete_data('factor_indicator', ref_date, conn3) insert_data('factor_indicator', df, conn3) return 0
def update_daily_portfolio(ds, **kwargs): execution_date = kwargs['next_execution_date'] if not isBizDay('china.sse', execution_date): logger.info("{0} is not a business day".format(execution_date)) return 0 prev_date = advanceDateByCalendar('china.sse', execution_date, '-1b') logger.info("factor data is loading for {0}".format(prev_date)) logger.info("Current running date is {0}".format(execution_date)) common_factors = ['EPSAfterNonRecurring', 'DivP'] prod_factors = ['CFinc1', 'BDTO', 'RVOL'] uqer_factors = ['CoppockCurve', 'EPS'] factor_weights = np.array([-1.0, 2.0]) factor_weights = factor_weights / factor_weights.sum() engine = sqlalchemy.create_engine('mysql+mysqldb://sa:[email protected]/multifactor?charset=utf8') engine2 = sqlalchemy.create_engine( 'mysql+pymysql://sa:[email protected]:3306/multifactor?charset=utf8') common_factors_df = pd.read_sql("select Code, 申万一级行业, {0} from factor_data where Date = '{1}'" .format(','.join(common_factors), prev_date), engine) prod_factors_df = pd.read_sql("select Code, {0} from prod_500 where Date = '{1}'" .format(','.join(prod_factors), prev_date), engine) uqer_factor_df = pd.read_sql( "select Code, {0} from factor_uqer where Date = '{1}'".format(','.join(uqer_factors), prev_date), engine2) risk_factor_df = pd.read_sql("select Code, {0} from risk_factor_500 where Date = '{1}'" .format(','.join(risk_factors_500), prev_date), engine) index_components_df = get_etf_index_weight.get_nffund_idx_etf_component(prev_date.strftime('%Y%m%d'), index='zz500') index_industry_weights = get_etf_index_weight.get_sw_industry_weight(index_components_df) index_components_df.rename(columns={'weight': 'benchmark'}, inplace=True) total_data = pd.merge(common_factors_df, uqer_factor_df, on=['Code']) total_data = pd.merge(total_data, risk_factor_df, on=['Code']) total_data = pd.merge(total_data, index_components_df, on=['Code']) total_data = total_data[total_data['benchmark'] != 0] null_flags = np.any(np.isnan(total_data[uqer_factors]), axis=1) total_data.fillna(0, inplace=True) total_factors = uqer_factors risk_factors_names = risk_factors_500 + ['Market'] total_data['Market'] = 1. all_factors = total_data[total_factors] risk_factors = total_data[risk_factors_names] factor_processed = neutralize(risk_factors.values, standardize(winsorize_normal(all_factors.values))) normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=[prev_date] * len(factor_processed)) er = normed_factor @ factor_weights # portfolio construction bm = total_data['benchmark'].values lbound = np.zeros(len(total_data)) ubound = 0.01 + bm risk_exposure = total_data[risk_factors_names].values ubound[null_flags] = 0. if len(bm) != 500: total_weight = index_industry_weights['weight'].sum() filtered = index_industry_weights[index_industry_weights.industry.isin(risk_factors_500)] ind_weights = filtered['weight'].values risk_lbound = np.concatenate([ind_weights / total_weight, [bm @ total_data['Size'].values / total_weight], [1.]], axis=0) risk_ubound = np.concatenate([ind_weights / total_weight, [bm @ total_data['Size'].values / total_weight], [1.]], axis=0) else: risk_lbound = bm @ risk_exposure risk_ubound = bm @ risk_exposure # get black list 1 engine = sqlalchemy.create_engine('mssql+pymssql://sa:[email protected]/WindDB') black_list = pd.read_sql("select S_INFO_WINDCODE, S_INFO_LISTDATE, sum(S_SHARE_RATIO) as s_ratio from ASHARECOMPRESTRICTED \ where S_INFO_LISTDATE BETWEEN '{0}' and '{1}' " \ "GROUP BY S_INFO_WINDCODE, S_INFO_LISTDATE ORDER BY s_ratio DESC;" .format((execution_date - dt.timedelta(days=7)).strftime('%Y%m%d'), (execution_date + dt.timedelta(days=14)).strftime('%Y%m%d')), engine) black_list = black_list[black_list['s_ratio'] >= 3.] black_list.S_INFO_WINDCODE = black_list.S_INFO_WINDCODE.str.split('.').apply(lambda x: int(x[0])) mask_array = total_data.Code.isin(black_list.S_INFO_WINDCODE) ubound[mask_array.values] = 0. # get black list 2 black_list2 = pd.read_sql("select S_INFO_WINDCODE, AVG(S_WQ_AMOUNT) as avg_amount from ASHAREWEEKLYYIELD " "where TRADE_DT < {1} and TRADE_DT >= {0} GROUP BY S_INFO_WINDCODE;" .format((execution_date - dt.timedelta(days=30)).strftime('%Y%m%d'), execution_date.strftime('%Y%m%d')), engine) black_list2 = black_list2[black_list2['avg_amount'] <= 15000.] black_list2.S_INFO_WINDCODE = black_list2.S_INFO_WINDCODE.str.split('.').apply(lambda x: int(x[0])) mask_array2 = total_data.Code.isin(black_list2.S_INFO_WINDCODE) ubound[mask_array2.values] = 0. # get black list 3 black_list3 = pd.read_sql("SELECT S_INFO_WINDCODE, S_DQ_SUSPENDDATE FROM ASHARETRADINGSUSPENSION AS a " "WHERE a.S_DQ_SUSPENDDATE = (SELECT top 1 S_DQ_SUSPENDDATE FROM ASHARETRADINGSUSPENSION AS b " "WHERE a.S_INFO_WINDCODE=b.S_INFO_WINDCODE and cast(floor(cast(b.OPDATE as float)) as datetime) <= '{0}' ORDER BY b.S_DQ_SUSPENDDATE DESC) " "AND a.S_INFO_WINDCODE IN (SELECT S_INFO_WINDCODE FROM ASHAREDESCRIPTION AS c " "WHERE c.S_INFO_DELISTDATE IS NULL) AND (a.S_DQ_SUSPENDDATE>='{1}' OR (a.S_DQ_RESUMPDATE IS NULL AND a.S_DQ_SUSPENDTYPE=444003000))" .format(execution_date, execution_date.strftime('%Y%m%d')), engine) black_list3.S_INFO_WINDCODE = black_list3.S_INFO_WINDCODE.str.split('.').apply(lambda x: int(x[0])) mask_array3 = total_data.Code.isin(black_list3.S_INFO_WINDCODE) ubound[mask_array3.values] = 0. # manual black list try: bk_list = pd.read_csv('~/mnt/sharespace/personal/licheng/portfolio/zz500_black_list/{0}.csv'.format( prev_date.strftime('%Y-%m-%d')), encoding='gbk', names=['code']) logger.info('Manual black list exists for the date: {0}'.format(prev_date.strftime('%Y-%m-%d'))) for code in bk_list['code']: ubound[total_data.Code == int(code)] = 0. except FileNotFoundError: logger.info('No manual black list exists for the date: {0}'.format(prev_date.strftime('%Y-%m-%d'))) weights = build_portfolio(er, builder='linear', risk_exposure=risk_exposure, lbound=lbound, ubound=ubound, risk_target=(risk_lbound, risk_ubound), solver='GLPK') portfolio = pd.DataFrame({'weight': weights, 'industry': total_data['申万一级行业'].values, 'zz500': total_data['benchmark'].values, 'er': er}, index=total_data.Code) client = pymongo.MongoClient('mongodb://10.63.6.176:27017') db = client.multifactor portfolio_collection = db.portfolio detail_info = {} for code, w, bm_w, ind, r in zip(total_data.Code.values, weights, total_data['benchmark'].values, total_data['申万一级行业'].values, er): detail_info[str(code)] = { 'weight': w, 'industry': ind, 'zz500': bm_w, 'er': r } portfolio_dict = {'Date': prev_date, 'portfolio': detail_info} portfolio_collection.delete_many({'Date': prev_date}) portfolio_collection.insert_one(portfolio_dict) portfolio.to_csv('~/mnt/sharespace/personal/licheng/portfolio/zz500/{0}.csv'.format(prev_date.strftime('%Y-%m-%d')), encoding='gbk') return 0