def insert_security_code(self, market, file_name, path): database = DatabaseName.INDEX_KLINE_DAILY.value with MongoConnect(database): print(path + file_name + '\n') kline_daily_data = pd.read_csv(path + file_name, encoding='unicode_escape') code = file_name.split('.')[0] code_transfer_dict = {'999999': '000001', '999998': '000002', '999997': '000003', '999996': '000004', '999995': '000005', '999994': '000006', '999993': '000007', '999992': '000008', '999991': '000010', '999990': '000011', '999989': '000012', '999988': '000013', '999987': '000016', '999986': '000015', '000300': '000300'} if market == 'SH': if code in code_transfer_dict.keys(): code = code_transfer_dict[code] else: code = '00' + code[2:] security_code = code + '.' + market kline_daily_data = kline_daily_data.reindex(columns=['date', 'open', 'high', 'low', 'close', 'volumw', 'turover', 'match_items', 'interest']) kline_daily_data.rename(columns={'volumw': 'volume', 'turover': 'amount'}, inplace=True) with switch_collection(Kline, security_code) as KlineDaily_security_code: doc_list = [] for index, row in kline_daily_data.iterrows(): date_int = int(row['date']) if not np.isnan(date_int): date_int = str(date_int) time_tag = datetime.strptime(date_int, "%Y%m%d") doc = KlineDaily_security_code(time_tag=time_tag, pre_close=None, open=int(row['open']), high=int(row['high']), low=int(row['low']), close=int(row['close']), volume=int(row['volume']), amount=int(row['amount']), match_items=int(row['match_items']), interest=int(row['interest'])) doc_list.append(doc) KlineDaily_security_code.objects.insert(doc_list)
def update_a_share_capitalization(self): """ 保存 总股本,总市值, 流通股本,流通市值 四个hdf5 :return: """ with MongoConnect(self.database): a_share_capitalization = AShareCapitalization.objects().as_pymongo() field_list = ['security_code', 'change_date', 'total_share', 'float_share', 'float_a_share', 'float_b_share', 'float_h_share'] self.a_share_capitalization = pd.DataFrame(list(a_share_capitalization)).reindex(columns=field_list) kline_object = GetKlineData() market_close_data = kline_object.cache_all_stock_data()['close'] index = list(set(market_close_data.index).union(set(self.a_share_capitalization['change_date']))) share_capitalization_grouped = self.a_share_capitalization.groupby('security_code') share_capitalization = pd.DataFrame(index=index) for i in share_capitalization_grouped: data = i[1].reindex(['change_date', 'total_share'], axis=1).sort_values('change_date').set_index( 'change_date') try: share_capitalization[i[0]] = data except ValueError: # 有四只票 change date 重复,需要手工清洗修正 # print(data) share_capitalization[i[0]] = data[data.index.duplicated()] share_capitalization = share_capitalization.fillna(method='ffill').reindex(market_close_data.index) return share_capitalization.multiply(10000) * market_close_data
def update_index_class(self, industry_class_name, industry_class_dict): with MongoConnect(self.database): index_members_data = AShareIndexMembers.objects( index_code__in=industry_class_dict.keys()).as_pymongo() field_list = ['index_code', 'security_code', 'in_date', 'out_date'] self.index_members_df = pd.DataFrame( list(index_members_data)).reindex(columns=field_list) self.index_members_df = self.index_members_df.fillna( datetime.now()).reset_index(drop=True) get_collection_list = GetCollectionList() a_share_list = get_collection_list.get_a_share_list() calendar_obj = GetCalendar() calendar_SH = calendar_obj.get_calendar('SH') self.index_class = pd.DataFrame(columns=a_share_list, index=calendar_SH) def industry_history(x, index_members_df): industry_in_out_date = index_members_df[ index_members_df.security_code == x.name] for index, row in industry_in_out_date.iterrows(): x[row['in_date']:row['out_date']] = row['index_code'] return x self.index_class = self.index_class.apply( industry_history, args=(self.index_members_df, ), axis=0) self.index_class = self.index_class.fillna(method='pad').fillna( method='backfill') folder_name = LocalDataFolderName.INDUSTRY_CLASS.value path = LocalDataPath.path + folder_name + '/' data_name = industry_class_name save_data_to_hdf5(path, data_name, self.index_class)
def save_factor_data(self, factor_name, data_source=None): if data_source is None: data_source = ['hdf5', 'mongo'] if 'hdf5' in data_source: # 保存预处理之后的数据到本地hdf5,单因子检测使用 path = LocalDataPath.path + LocalDataFolderName.FACTOR.value + '/' save_data_to_hdf5(path, factor_name, self.raw_data) if 'mongo' in data_source: # 保存预处理之后的数据到mongo with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value): doc_list = [] raw_data = self.raw_data.rename(columns={ i: code_market_to_market_code(i) for i in extreme_data.columns }) for index, row in raw_data.iterrows(): doc = FactorPreProcessingData(factor_name=factor_name, time_tag=index, factor_data=row) doc_list.append(doc) if len(doc_list) > 999: FactorPreProcessingData.objects.insert(doc_list) doc_list = [] else: FactorPreProcessingData.objects.insert(doc_list)
def save_a_share_ex_right_dividend(self): database = 'stock_base_data' with MongoConnect(database): doc_list = [] for index, row in self.data_df.iterrows(): row_dict = dict(row) row_dict['security_code'] = row_dict['S_INFO_WINDCODE'] row_dict.pop('OBJECT_ID') row_dict.pop('S_INFO_WINDCODE') doc = AShareExRightDividend() for key, value in row_dict.items(): if key.lower() in self.collection_property_list: property_name = AShareExRightDividend.__dict__[key.lower()] if isinstance(property_name, StringField): setattr(doc, key.lower(), str(value)) elif isinstance(property_name, DateTimeField): if np.isnan(value): setattr(doc, key.lower(), None) else: setattr(doc, key.lower(), datetime.strptime(str(int(value)), "%Y%m%d")) else: setattr(doc, key.lower(), value) doc_list.append(doc) if len(doc_list) > 999: AShareExRightDividend.objects.insert(doc_list) doc_list = [] else: AShareExRightDividend.objects.insert(doc_list)
def update_index_members(self): with MongoConnect(self.database): index_members_data = AShareIndexMembers.objects().as_pymongo() field_list = ['index_code', 'security_code', 'in_date', 'out_date'] self.index_members_df = pd.DataFrame( list(index_members_data)).reindex(columns=field_list) folder_name = LocalDataFolderName.INDEX_MEMBER.value path = LocalDataPath.path + folder_name + '/' data_name = folder_name save_data_to_hdf5(path, data_name, self.index_members_df)
def save_a_share_adj_factor_right(self): """ 取当日收盘价,作为转、送的股价, 再计算复权因子更新到AShareExRightDividend, 复权因子adj_factor 比例 = 送股比例 + 转增比例 + 缩减比例 单次复权因子 = 股权登记日收盘价 * (1 + 比例 + 配股比例 + 增发比例) / (股权登记日收盘价 - 派息比例 + 股权登记日收盘价 * 比例 + 配股价格 * 配股比例 + 增发价格 * 增发比例) :return: """ kline_object = GetKlineData() all_market_data = kline_object.cache_all_stock_data() with MongoConnect(self.database): self.data = pd.DataFrame( AShareExRightDividend.objects.as_pymongo()) self.data['close'] = self.data.apply( lambda x: self.get_adj_day_close(x['security_code'], x[ 'ex_date'], all_market_data), axis=1) self.data = self.data.fillna(0) ratio = self.data['bonus_share_ratio'] + self.data[ 'conversed_ratio'] + self.data['consolidate_split_ratio'] self.data['adj_factor'] = self.data['close'] * ( 1 + ratio + self.data['rightsissue_ratio'] + self.data['seo_ratio'] ) / (self.data['close'] - self.data['cash_dividend_ratio'] + self.data['close'] * ratio + self.data['rightsissue_price'] * self.data['rightsissue_ratio'] + self.data['seo_price'] * self.data['seo_ratio']) folder_name = LocalDataFolderName.ADJ_FACTOR.value path = LocalDataPath.path + folder_name + '/' self.data = self.data.reindex( columns=['security_code', 'ex_date', 'adj_factor']) self.data.set_index(["ex_date"], inplace=True) self.data.sort_index(inplace=True) calendar_obj = GetCalendar() calendar = calendar_obj.get_calendar('SZ') backward_factor = pd.DataFrame(index=calendar) adj_factor = pd.DataFrame(index=calendar) data_dict = dict( list(self.data.groupby(self.data['security_code']))) for security_code, adj_data in data_dict.items(): backward_factor[security_code] = self.cal_backward_factor( adj_data['adj_factor']) adj_factor[security_code] = adj_data['adj_factor'] backward_factor.replace([np.inf, 0], np.nan, inplace=True) backward_factor.fillna(method='ffill', inplace=True) backward_factor.fillna(1, inplace=True) backward_factor = backward_factor.reindex( columns=all_market_data['close'].columns, fill_value=1) save_data_to_hdf5(path, AdjustmentFactor.BACKWARD_ADJ_FACTOR.value, backward_factor) save_data_to_hdf5(path, AdjustmentFactor.FROWARD_ADJ_FACTOR.value, backward_factor.div(backward_factor.iloc[-1]))
def update_calendar_hdf5(self): with MongoConnect(self.database): data = AShareCalendar.objects().as_pymongo() data_df = pd.DataFrame(data) data_df.set_index('market', inplace=True) data_df = data_df.drop(['_id', 'update_date'], axis=1) folder_name = LocalDataFolderName.CALENDAR.value for index, row in data_df.iterrows(): path = LocalDataPath.path + folder_name + '/' data_name = folder_name + '_' + str(index) save_data_to_hdf5(path, data_name, pd.DataFrame(data_df.loc[index, 'trade_days']))
def insert_security_code(self, market, file_name, path): with MongoConnect(self.database): print(path + file_name + '\n') kline_daily_data = pd.read_csv(path + file_name, encoding='unicode_escape') security_code = file_name.split('.')[0] + '.' + market if is_security_type(security_code, 'EXTRA_STOCK_A'): kline_daily_data = kline_daily_data.reindex(columns=['date', 'open', 'high', 'low', 'close', 'volumw', 'turover', 'match_items', 'interest']) kline_daily_data.rename(columns={'volumw': 'volume', 'turover': 'amount'}, inplace=True) kline_daily_data = kline_daily_data[kline_daily_data.date >= 20020104] with switch_collection(Kline, security_code) as KlineDaily_security_code: doc_list = [] security_code_data = pd.DataFrame() if security_code in self.data_dict.keys(): security_code_data = self.data_dict[security_code].set_index(["TRADE_DT"]) security_code_data = security_code_data.fillna(0) for index, row in kline_daily_data.iterrows(): date_int = int(row['date']) if not np.isnan(date_int): try: pre_close = int(10000 * security_code_data.loc[date_int, 'S_DQ_PRECLOSE']) except KeyError: pre_close = None date_int = str(date_int) time_tag = datetime.strptime(date_int, "%Y%m%d") doc = KlineDaily_security_code(time_tag=time_tag, pre_close=pre_close, open=int(row['open']), high=int(row['high']), low=int(row['low']), close=int(row['close']), volume=int(row['volume']), amount=int(row['amount']), match_items=int(row['match_items']), interest=int(row['interest'])) doc_list.append(doc) # 用csv全表补充20020104之前的日线数据,match_items为0 security_code_data = security_code_data[security_code_data.index < 20020104] for index, row in security_code_data.iterrows(): if row['S_DQ_AMOUNT'] > 0: date_int = int(index) date_int = str(date_int) time_tag = datetime.strptime(date_int, "%Y%m%d") try: pre_close = int(row['S_DQ_PRECLOSE'] * 10000) except KeyError: pre_close = None doc = KlineDaily_security_code(time_tag=time_tag, pre_close=pre_close, open=int(row['S_DQ_OPEN'] * 10000), high=int(row['S_DQ_HIGH'] * 10000), low=int(row['S_DQ_LOW'] * 10000), close=int(row['S_DQ_CLOSE'] * 10000), volume=int(row['S_DQ_VOLUME'] * 100), amount=int(row['S_DQ_AMOUNT'] * 1000), match_items=0, interest=0) doc_list.append(doc) KlineDaily_security_code.objects.insert(doc_list)
def update_a_share_capitalization(self): """ 保存 总股本,总市值, 流通股本,流通市值 四个hdf5 :return: """ with MongoConnect(self.database): a_share_capitalization = AShareCapitalization.objects().as_pymongo( ) field_list = [ 'security_code', 'change_date', 'total_share', 'float_share', 'float_a_share', 'float_b_share', 'float_h_share' ] self.a_share_capitalization = pd.DataFrame( list(a_share_capitalization)).reindex(columns=field_list) kline_object = GetKlineData() market_close_data = kline_object.cache_all_stock_data()['close'] index = list( set(market_close_data.index).union( set(self.a_share_capitalization['change_date']))) index.sort() share_capitalization_grouped = self.a_share_capitalization.groupby( 'security_code') total_share = pd.DataFrame({}) float_a_share = pd.DataFrame({}) for i in share_capitalization_grouped: data = i[1].sort_values('change_date').set_index('change_date') try: total_share[i[0]] = data['total_share'].reindex(index) float_a_share[i[0]] = data['float_a_share'].reindex(index) except ValueError: # 有四只票 change date 重复,需要手工清洗修正 # print(data[data.index.duplicated()]) total_share[i[0]] = data[ data.index.duplicated()]['total_share'].reindex(index) float_a_share[i[0]] = data[data.index.duplicated( )]['float_a_share'].reindex(index) total_share = total_share.fillna(method='ffill').reindex( market_close_data.index) float_a_share = float_a_share.fillna(method='ffill').reindex( market_close_data.index) total_share_value = total_share.multiply(10000) * market_close_data float_a_share_value = float_a_share.multiply( 10000) * market_close_data folder_name = LocalDataFolderName.INDICATOR_EVERYDAY.value path = LocalDataPath.path + folder_name + '/' save_data_to_hdf5(path, 'total_share', total_share) save_data_to_hdf5(path, 'float_a_share', float_a_share) save_data_to_hdf5(path, 'total_share_value', total_share_value) save_data_to_hdf5(path, 'float_a_share_value', float_a_share_value)
def _get_data_with_process_pool(self, database, security_list, process_manager_dict, security_list_i): with MongoConnect(database): thread_data_dict = {} for stock in security_list: with switch_collection(Kline, stock) as KlineDaily_security_code: security_code_data = KlineDaily_security_code.objects( time_tag__lte=self.end).as_pymongo() security_code_data_df = pd.DataFrame( list(security_code_data)).reindex(columns=self.field) security_code_data_df.set_index(["time_tag"], inplace=True) thread_data_dict[stock] = security_code_data_df.reindex( self.calendar_SZ).fillna(method='ffill') process_manager_dict[security_list_i] = thread_data_dict
def save_a_share_calendar(self): database = DatabaseName.STOCK_BASE_DATA.value with MongoConnect(database): doc_list = [] data_grouped = self.data_df.groupby("S_INFO_EXCHMARKET") data_dict = {i[0]: list(i[1]['TRADE_DAYS']) for i in data_grouped} for market, trade_days in data_dict.items(): if market == 'SSE': market = 'SH' elif market == 'SZSE': market = 'SZ' trade_days = [ date_to_datetime(str(i)) for i in sorted(trade_days) ] doc = AShareCalendar(market=market, trade_days=trade_days) doc_list.append(doc) AShareCalendar.objects.insert(doc_list)
def update_index_data(self, end=datetime.now()): """ :param end: :return: """ get_collection_list = GetCollectionList() index_list = get_collection_list.get_index_list() self.end = end database = DatabaseName.INDEX_KLINE_DAILY.value with MongoConnect(database): index_data_dict = {} for index_code in index_list: with switch_collection(Kline, index_code) as KlineDaily_index_code: security_code_data = KlineDaily_index_code.objects( time_tag__lte=self.end).as_pymongo() security_code_data_df = pd.DataFrame( list(security_code_data)).reindex(columns=self.field) security_code_data_df.set_index(["time_tag"], inplace=True) # 数据库中数据多了一天,特殊处理删除了 if pd.Timestamp(datetime( 2016, 1, 1)) in security_code_data_df.index: security_code_data_df = security_code_data_df.drop( labels=datetime(2016, 1, 1), axis=0) index_data_dict[index_code] = security_code_data_df field_data_dict = {} for i in self.field: if i != 'time_tag': field_data_pd = pd.DataFrame( {key: value[i] for key, value in index_data_dict.items()}) # 原始数据的开高低收除以10000 if i in ['open', 'high', 'low', 'close']: field_data_dict[i] = field_data_pd.div(10000) else: field_data_dict[i] = field_data_pd folder_name = LocalDataFolderName.MARKET_DATA.value sub_folder_name = LocalDataFolderName.KLINE_DAILY.value sub_sub_folder_name = LocalDataFolderName.INDEX.value for field in self.field: if field not in ['time_tag', 'interest']: path = LocalDataPath.path + folder_name + '/' + sub_folder_name + '/' + sub_sub_folder_name + '/' data_name = field save_data_to_hdf5(path, data_name, pd.DataFrame(field_data_dict[field]))
def save_ic_analysis_result(self, factor_name): with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value): ic_df = self.ic_df.copy() p_value_df = self.p_value_df.copy() ic_df.index = ic_df.index.format() p_value_df.index = p_value_df.index.format() doc = FactorIcAnalysisResult( factor_name=factor_name, # 因子数据开始时间 begin_date=self.factor.index[0], # 因子数据结束时间 end_date=self.factor.index[-1], # IC信号衰减计算,index 是时间序列, columns是decay周期,[1, self.ic_decay], 闭区间 ic=ic_df, # p值信号衰减计算,index 是时间序列, columns是decay周期,[1, self.ic_decay], 闭区间 p_value=p_value_df, ic_result=self.ic_result) doc.save()
def save_a_sw_index(self): database = 'stock_base_data' with MongoConnect(database): doc_list = [] for index, row in self.data_df.iterrows(): row = dict(row) for filed, value in row.items(): if filed in [ 'S_DQ_PRECLOSE', 'S_DQ_OPEN', 'S_DQ_HIGH', 'S_DQ_LOW', 'S_DQ_CLOSE' ]: if not np.isnan(value): row[filed] = int(row[filed] * 10000) elif filed in ['S_DQ_VOLUME']: if not np.isnan(value): row[filed] = int(row[filed] * 100) elif filed in ['S_DQ_AMOUNT']: if not np.isnan(value): row[filed] = int(row[filed] * 1000) doc = ASwsIndex( sw_index_code=row['S_INFO_WINDCODE'], time_tag=date_to_datetime(str(row['TRADE_DT'])), pre_close=row['S_DQ_PRECLOSE'], open=row['S_DQ_OPEN'], high=row['S_DQ_HIGH'], low=row['S_DQ_LOW'], close=row['S_DQ_CLOSE'], volume=row['S_DQ_VOLUME'], amount=row['S_DQ_AMOUNT'], index_pe=row['S_VAL_PE'], index_pb=row['S_VAL_PB'], index_free_float_market_capitalisation=row['S_DQ_MV'], index_total_market_capitalisation=row['S_VAL_MV']) doc_list.append(doc) if len(doc_list) > 999: ASwsIndex.objects.insert(doc_list) doc_list = [] else: ASwsIndex.objects.insert(doc_list)
def save_share_capitalization(self): database = DatabaseName.STOCK_BASE_DATA.value with MongoConnect(database): doc_list = [] for index, row in self.data_df.iterrows(): # row_dict = {'security_code': row['S_INFO_WINDCODE'], # 'change_date': datetime.strptime(str(int(row['CHANGE_DT'])), "%Y%m%d"), # 'total_share': row['TOT_SHR'], # 'float_share': row['FLOAT_SHR'], # 'float_a_share': row['FLOAT_A_SHR'], # 'float_b_share': row['FLOAT_B_SHR'], # 'float_h_share': row['FLOAT_H_SHR']} doc = AShareCapitalization(security_code=row['S_INFO_WINDCODE'], change_date=datetime.strptime(str(int(row['CHANGE_DT'])), "%Y%m%d"), total_share=row['TOT_SHR'], float_share=row['FLOAT_SHR'], float_a_share=row['FLOAT_A_SHR'], float_b_share=row['FLOAT_B_SHR'], float_h_share=row['FLOAT_H_SHR']) doc_list.append(doc) AShareCapitalization.objects.insert(doc_list)
def update_a_sws_index(self): database = DatabaseName.STOCK_BASE_DATA.value with MongoConnect(database): a_sws_index = ASwsIndex.objects().as_pymongo() field_list = [ 'sw_index_code', 'time_tag', 'pre_close', 'open', 'high', 'low', 'close', 'volume', 'amount', 'index_pe', 'index_pb', 'index_free_float_market_capitalisation', 'index_total_market_capitalisation' ] self.a_sws_index_df = pd.DataFrame(a_sws_index).reindex( columns=field_list) self.a_sws_index_df[['pre_close', 'open', 'high', 'low', 'close']] = self.a_sws_index_df[[ 'pre_close', 'open', 'high', 'low', 'close' ]].div(10000) folder_name = LocalDataFolderName.SWS_INDEX.value path = LocalDataPath.path + folder_name + '/' data_name = folder_name save_data_to_hdf5(path, data_name, self.a_sws_index_df)
def insert_security_code_list(self): stock_code_list = [] for market in self.market_list: path = self.data_path + market + '/MultDate/' file_list = os.listdir(path) stock_code_list += [i.split('.')[0] + '.' + market for i in file_list] file_num = 0 p = Pool(8) for file_name in file_list: file_num += 1 print('完成数量:', file_num) p.apply_async(self.insert_security_code, args=(market, file_name, path)) p.close() p.join() delist = list(set(self.data_dict.keys()).difference(set(stock_code_list))) with MongoConnect(self.database): for security_code in delist: with switch_collection(Kline, security_code) as KlineDaily_security_code: doc_list = [] security_code_data = self.data_dict[security_code].set_index(["TRADE_DT"]) for index, row in security_code_data.iterrows(): if row['S_DQ_AMOUNT'] > 0: date_int = int(index) date_int = str(date_int) time_tag = datetime.strptime(date_int, "%Y%m%d") try: pre_close = int(row['S_DQ_PRECLOSE'] * 10000) except KeyError: pre_close = None doc = KlineDaily_security_code(time_tag=time_tag, pre_close=pre_close, open=int(row['S_DQ_OPEN'] * 10000), high=int(row['S_DQ_HIGH'] * 10000), low=int(row['S_DQ_LOW'] * 10000), close=int(row['S_DQ_CLOSE'] * 10000), volume=int(row['S_DQ_VOLUME'] * 100), amount=int(row['S_DQ_AMOUNT'] * 1000), match_items=0, interest=0) doc_list.append(doc) KlineDaily_security_code.objects.insert(doc_list)
def save_ic_analysis_result(self, factor_name): with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value): ic_df = self.ic_df.copy() p_value_df = self.p_value_df.copy() ic_df.index = ic_df.index.format() p_value_df.index = p_value_df.index.format() doc = FactorIcAnalysisResult( factor_name=factor_name, # 因子数据开始时间 begin_date=self.factor.index[0], # 因子数据结束时间 end_date=self.factor.index[-1], # IC信号衰减计算,index 是时间序列, columns是decay周期,[1, self.ic_decay], 闭区间 ic=ic_df, # p值信号衰减计算,index 是时间序列, columns是decay周期,[1, self.ic_decay], 闭区间 p_value=p_value_df, # IC均值、 IC标准差、 IC_IR比率、 IC > 0 占比、 | IC | > 0.02 占比(绝对值)、 偏度、 峰度、 # 正相关显著比例、负相关显著比例、状态切换比例、同向比例 # index_list=['ic_mean', 'ic_std', 'ic_ir', 'ic_ratio', 'ic_abs_ratio', 'ic_skewness', 'ic_kurtosis', # 'ic_positive_ratio', 'ic_negative_ratio', 'ic_change_ratio', 'ic_unchange_ratio', ] ic_result=self.ic_result) doc.save()
def save_regression_analysis_result(self, factor_name): with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value): factor_return = self.factor_return.copy() factor_t_value = self.factor_t_value.copy() net_analysis_result = self.net_analysis_result factor_return.index = factor_return.index.format() factor_t_value.index = factor_t_value.index.format() net_analysis_result['cumsum'][ 'net_value_df'].index = net_analysis_result['cumsum'][ 'net_value_df'].index.format() net_analysis_result['cumprod'][ 'net_value_df'].index = net_analysis_result['cumprod'][ 'net_value_df'].index.format() net_analysis_result['cumsum'][ 'benchmark_df'].index = net_analysis_result['cumsum'][ 'benchmark_df'].index.format() net_analysis_result['cumprod'][ 'benchmark_df'].index = net_analysis_result['cumprod'][ 'benchmark_df'].index.format() doc = FactorRegressionAnalysisResult( factor_name=factor_name, # 因子数据开始时间 begin_date=self.factor.index[0], # 因子数据结束时间 end_date=self.factor.index[-1], # 因子收益率的自相关系数acf和偏自相关系数pacf,默认1-10阶,结果list len=11,取1-10个数 acf_result=self.acf_result, # 因子收益率,单利,复利, 日收益率 factor_return=factor_return, # 单因子检测的T值, Series, index为时间 factor_t_value=factor_t_value, # 单因子检测的T值的统计值,'t_value_mean': 绝对值均值, 't_value_greater_two':绝对值序列大于2的占比 factor_t_value_statistics=self.factor_t_value_statistics, # 净值分析结果 net_analysis_result=self.net_analysis_result) doc.save()
result.columns = factor_ic.keys() return result.div(result.sum(1), axis=0) if __name__ == '__main__': factor_list = ['factor_ma5', 'factor_ma10'] path = LocalDataPath.path + LocalDataFolderName.FACTOR.value + '/' factor_data = {} for factor_name in factor_list: factor_single_data = get_local_data(path, factor_name + '.h5') # 指数数据不全,需要删一部分因子数据 factor_data[factor_name] = factor_single_data[ factor_single_data.index < datetime(2020, 1, 1)] with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value): factor_ic = {} factor_return = {} for factor_name in factor_list: factor_regression_analysis_result = FactorRegressionAnalysisResult.objects(factor_name=factor_name) \ .only('factor_name') \ .only('begin_date') \ .only('end_date') \ .only('factor_return') \ .as_pymongo() factor_return[factor_name] = pd.DataFrame( factor_regression_analysis_result[0]['factor_return']) factor_return[factor_name].index = pd.DatetimeIndex( factor_return[factor_name].index) factor_ic_result = FactorIcAnalysisResult.objects(factor_name=factor_name) \
# -*- coding: utf-8 -*- # ------------------------------ # @Time : 2019/11/21 # @Author : gao # @File : update_finance_data.py # @Project : AmazingQuant # ------------------------------ from AmazingQuant.utils.mongo_connection_me import MongoConnect from apps.server.database_server.database_field.field_a_share_finance_data import AShareIncome from AmazingQuant.utils.performance_test import Timer if __name__ == '__main__': database = 'stock_base_data' with MongoConnect(database): with Timer(True): security_code_list = AShareIncome.objects.distinct('security_code') data = AShareIncome.objects(security_code__in=security_code_list, statement_type=408009000) for i in data: print(i.security_code)