def update_index_data(self, end=datetime.now()): """ :param end: :return: """ get_collection_list = GetCollectionList() index_list = get_collection_list.get_index_list() self.end = end database = DatabaseName.INDEX_KLINE_DAILY.value with MongoConnect(database): index_data_dict = {} for index_code in index_list: with switch_collection(Kline, index_code) as KlineDaily_index_code: security_code_data = KlineDaily_index_code.objects(time_tag__lte=self.end).as_pymongo() security_code_data_df = pd.DataFrame(list(security_code_data)).reindex(columns=self.field) security_code_data_df.set_index(["time_tag"], inplace=True) index_data_dict[index_code] = security_code_data_df field_data_dict = {} for i in self.field: if i != 'time_tag': field_data_pd = pd.DataFrame({key: value[i] for key, value in index_data_dict.items()}) # 原始数据的开高低收除以10000 if i in ['open', 'high', 'low', 'close']: field_data_dict[i] = field_data_pd.div(10000) else: field_data_dict[i] = field_data_pd folder_name = LocalDataFolderName.MARKET_DATA.value sub_folder_name = LocalDataFolderName.KLINE_DAILY.value sub_sub_folder_name = LocalDataFolderName.INDEX.value for field in self.field: if field not in ['time_tag', 'interest']: path = LocalDataPath.path + folder_name + '/' + sub_folder_name + '/' + sub_sub_folder_name + '/' data_name = field save_data_to_hdf5(path, data_name, pd.DataFrame(field_data_dict[field]))
def save_a_share_cash_flow(self): database = 'stock_base_data' with MongoConnect(database): doc_list = [] for index, row in self.data_df.iterrows(): row_dict = dict(row) row_dict['security_code'] = row_dict['S_INFO_WINDCODE'] row_dict.pop('WIND_CODE') row_dict.pop('OBJECT_ID') row_dict.pop('S_INFO_WINDCODE') doc = AShareCashFlow() for key, value in row_dict.items(): if key.lower() in self.collection_property_list: property_name = AShareCashFlow.__dict__[key.lower()] if isinstance(property_name, StringField): setattr(doc, key.lower(), str(value)) elif isinstance(property_name, DateTimeField): if np.isnan(value): setattr(doc, key.lower(), None) else: setattr( doc, key.lower(), datetime.strptime(str(int(value)), "%Y%m%d")) else: setattr(doc, key.lower(), value) doc_list.append(doc) if len(doc_list) > 999: AShareCashFlow.objects.insert(doc_list) doc_list = [] else: AShareCashFlow.objects.insert(doc_list)
def _get_data_with_process_pool(self, database, security_list, process_manager_dict, security_list_i): with MongoConnect(database): thread_data_dict = {} for stock in security_list: with switch_collection(Kline, stock) as KlineDaily_security_code: security_code_data = KlineDaily_security_code.objects(time_tag__lte=self.end).as_pymongo() security_code_data_df = pd.DataFrame(list(security_code_data)).reindex(columns=self.field) security_code_data_df.set_index(["time_tag"], inplace=True) thread_data_dict[stock] = security_code_data_df.reindex(self.calendar_SZ).fillna(method='ffill') process_manager_dict[security_list_i] = thread_data_dict
def update_index_members(self): database = DatabaseName.STOCK_BASE_DATA.value with MongoConnect(database): index_members_data = AShareIndexMembers.objects().as_pymongo() field_list = ['index_code', 'security_code', 'in_date', 'out_date'] self.index_members_df = pd.DataFrame( list(index_members_data)).reindex(columns=field_list) folder_name = LocalDataFolderName.INDEX_MEMBER.value path = LocalDataPath.path + folder_name + '/' data_name = folder_name save_data_to_hdf5(path, data_name, self.index_members_df)
def save_a_share_adj_factor_right(self): """ 取当日收盘价,作为转、送的股价, 再计算复权因子更新到AShareExRightDividend, 复权因子adj_factor 比例 = 送股比例 + 转增比例 + 缩减比例 单次复权因子 = 股权登记日收盘价 * (1 + 比例 + 配股比例 + 增发比例) / (股权登记日收盘价 - 派息比例 + 股权登记日收盘价 * 比例 + 配股价格 * 配股比例 + 增发价格 * 增发比例) :return: """ kline_object = GetKlineData() all_market_data = kline_object.cache_all_stock_data() with MongoConnect(self.database): self.data = pd.DataFrame( AShareExRightDividend.objects.as_pymongo()) self.data['close'] = self.data.apply( lambda x: self.get_adj_day_close(x['security_code'], x[ 'ex_date'], all_market_data), axis=1) self.data = self.data.fillna(0) ratio = self.data['bonus_share_ratio'] + self.data[ 'conversed_ratio'] + self.data['consolidate_split_ratio'] self.data['adj_factor'] = self.data['close'] * ( 1 + ratio + self.data['rightsissue_ratio'] + self.data['seo_ratio'] ) / (self.data['close'] - self.data['cash_dividend_ratio'] + self.data['close'] * ratio + self.data['rightsissue_price'] * self.data['rightsissue_ratio'] + self.data['seo_price'] * self.data['seo_ratio']) folder_name = LocalDataFolderName.ADJ_FACTOR.value path = LocalDataPath.path + folder_name + '/' self.data = self.data.reindex( columns=['security_code', 'ex_date', 'adj_factor']) self.data.set_index(["ex_date"], inplace=True) self.data.sort_index(inplace=True) calendar_obj = GetCalendar() calendar = calendar_obj.get_calendar('SZ') backward_factor = pd.DataFrame(index=calendar) adj_factor = pd.DataFrame(index=calendar) data_dict = dict( list(self.data.groupby(self.data['security_code']))) for security_code, adj_data in data_dict.items(): backward_factor[security_code] = self.cal_backward_factor( adj_data['adj_factor']) adj_factor[security_code] = adj_data['adj_factor'] backward_factor.replace([np.inf, 0], np.nan, inplace=True) backward_factor.fillna(method='ffill', inplace=True) backward_factor.fillna(1, inplace=True) save_data_to_hdf5(path, AdjustmentFactor.BACKWARD_ADJ_FACTOR.value, backward_factor) save_data_to_hdf5(path, AdjustmentFactor.FROWARD_ADJ_FACTOR.value, backward_factor.div(backward_factor.iloc[-1]))
def update_calendar_hdf5(self): with MongoConnect(self.database): data = AShareCalendar.objects().as_pymongo() data_df = pd.DataFrame(data) data_df.set_index('market', inplace=True) data_df = data_df.drop(['_id', 'update_date'], axis=1) folder_name = LocalDataFolderName.CALENDAR.value for index, row in data_df.iterrows(): path = LocalDataPath.path + folder_name + '/' data_name = folder_name + '_' + str(index) save_data_to_hdf5( path, data_name, pd.DataFrame(data_df.loc[index, 'trade_days']))
def insert_security_code_list(self): stock_code_list = [] for market in self.market_list: path = self.data_path + market + '/MultDate/' file_list = os.listdir(path) stock_code_list += [ i.split('.')[0] + '.' + market for i in file_list ] file_num = 0 p = Pool(8) for file_name in file_list: file_num += 1 print('完成数量:', file_num) p.apply_async(self.insert_security_code, args=(market, file_name, path)) p.close() p.join() delist = list( set(self.data_dict.keys()).difference(set(stock_code_list))) with MongoConnect(self.database): for security_code in delist: with switch_collection( Kline, security_code) as KlineDaily_security_code: doc_list = [] security_code_data = self.data_dict[ security_code].set_index(["TRADE_DT"]) for index, row in security_code_data.iterrows(): if row['S_DQ_AMOUNT'] > 0: date_int = int(index) date_int = str(date_int) time_tag = datetime.strptime(date_int, "%Y%m%d") try: pre_close = int(row['S_DQ_PRECLOSE'] * 10000) except KeyError: pre_close = None doc = KlineDaily_security_code( time_tag=time_tag, pre_close=pre_close, open=int(row['S_DQ_OPEN'] * 10000), high=int(row['S_DQ_HIGH'] * 10000), low=int(row['S_DQ_LOW'] * 10000), close=int(row['S_DQ_CLOSE'] * 10000), volume=int(row['S_DQ_VOLUME'] * 100), amount=int(row['S_DQ_AMOUNT'] * 1000), match_items=0, interest=0) doc_list.append(doc) KlineDaily_security_code.objects.insert(doc_list)
def save_a_share_calendar(self): database = DatabaseName.STOCK_BASE_DATA.value with MongoConnect(database): doc_list = [] data_grouped = self.data_df.groupby("S_INFO_EXCHMARKET") data_dict = {i[0]: list(i[1]['TRADE_DAYS']) for i in data_grouped} for market, trade_days in data_dict.items(): if market == 'SSE': market = 'SH' elif market == 'SZSE': market = 'SZ' trade_days = [date_to_datetime(str(i)) for i in sorted(trade_days)] doc = AShareCalendar(market=market, trade_days=trade_days) doc_list.append(doc) AShareCalendar.objects.insert(doc_list)
def insert_security_code(self, market, file_name, path): with MongoConnect(self.database): print(path + file_name + '\n') kline_daily_data = pd.read_csv(path + file_name, encoding='unicode_escape') security_code = file_name.split('.')[0] + '.' + market if is_security_type(security_code, 'EXTRA_STOCK_A'): kline_daily_data = kline_daily_data.reindex(columns=[ 'date', 'open', 'high', 'low', 'close', 'volumw', 'turover', 'match_items', 'interest' ]) kline_daily_data.rename(columns={ 'volumw': 'volume', 'turover': 'amount' }, inplace=True) kline_daily_data = kline_daily_data[ kline_daily_data.date >= 20020104] with switch_collection( Kline, security_code) as KlineDaily_security_code: doc_list = [] security_code_data = pd.DataFrame() if security_code in self.data_dict.keys(): security_code_data = self.data_dict[ security_code].set_index(["TRADE_DT"]) security_code_data = security_code_data.fillna(0) for index, row in kline_daily_data.iterrows(): date_int = int(row['date']) if not np.isnan(date_int): try: pre_close = int( 10000 * security_code_data.loc[date_int, 'S_DQ_PRECLOSE']) except KeyError: pre_close = None date_int = str(date_int) time_tag = datetime.strptime(date_int, "%Y%m%d") doc = KlineDaily_security_code( time_tag=time_tag, pre_close=pre_close, open=int(row['open']), high=int(row['high']), low=int(row['low']), close=int(row['close']), volume=int(row['volume']), amount=int(row['amount']), match_items=int(row['match_items']), interest=int(row['interest'])) doc_list.append(doc) # 用csv全表补充20020104之前的日线数据,match_items为0 security_code_data = security_code_data[ security_code_data.index < 20020104] for index, row in security_code_data.iterrows(): if row['S_DQ_AMOUNT'] > 0: date_int = int(index) date_int = str(date_int) time_tag = datetime.strptime(date_int, "%Y%m%d") try: pre_close = int(row['S_DQ_PRECLOSE'] * 10000) except KeyError: pre_close = None doc = KlineDaily_security_code( time_tag=time_tag, pre_close=pre_close, open=int(row['S_DQ_OPEN'] * 10000), high=int(row['S_DQ_HIGH'] * 10000), low=int(row['S_DQ_LOW'] * 10000), close=int(row['S_DQ_CLOSE'] * 10000), volume=int(row['S_DQ_VOLUME'] * 100), amount=int(row['S_DQ_AMOUNT'] * 1000), match_items=0, interest=0) doc_list.append(doc) KlineDaily_security_code.objects.insert(doc_list)
# -*- coding: utf-8 -*- # ------------------------------ # @Time : 2019/11/21 # @Author : gao # @File : update_finance_data.py # @Project : AmazingQuant # ------------------------------ from AmazingQuant.data_center.mongo_connection_me import MongoConnect from AmazingQuant.data_center.database_field.field_a_share_finance_data import AShareIncome, AShareCashFlow from AmazingQuant.utils.performance_test import Timer if __name__ == '__main__': database = 'stock_base_data' with MongoConnect(database): with Timer(True): security_code_list = AShareIncome.objects.distinct('security_code') data = AShareIncome.objects(security_code__in=security_code_list, statement_type=408009000) for i in data: print(i.security_code)
def insert_security_code(self, market, file_name, path): database = DatabaseName.INDEX_KLINE_DAILY.value with MongoConnect(database): print(path + file_name + '\n') kline_daily_data = pd.read_csv(path + file_name, encoding='unicode_escape') code = file_name.split('.')[0] code_transfer_dict = { '999999': '000001', '999998': '000002', '999997': '000003', '999996': '000004', '999995': '000005', '999994': '000006', '999993': '000007', '999992': '000008', '999991': '000010', '999990': '000011', '999989': '000012', '999988': '000013', '999987': '000016', '999986': '000015', '000300': '000300' } if market == 'SH': if code in code_transfer_dict.keys(): code = code_transfer_dict[code] else: code = '00' + code[2:] security_code = code + '.' + market kline_daily_data = kline_daily_data.reindex(columns=[ 'date', 'open', 'high', 'low', 'close', 'volumw', 'turover', 'match_items', 'interest' ]) kline_daily_data.rename(columns={ 'volumw': 'volume', 'turover': 'amount' }, inplace=True) with switch_collection(Kline, security_code) as KlineDaily_security_code: doc_list = [] for index, row in kline_daily_data.iterrows(): date_int = int(row['date']) if not np.isnan(date_int): date_int = str(date_int) time_tag = datetime.strptime(date_int, "%Y%m%d") doc = KlineDaily_security_code( time_tag=time_tag, pre_close=None, open=int(row['open']), high=int(row['high']), low=int(row['low']), close=int(row['close']), volume=int(row['volume']), amount=int(row['amount']), match_items=int(row['match_items']), interest=int(row['interest'])) doc_list.append(doc) KlineDaily_security_code.objects.insert(doc_list)