def update_index_class(self, industry_class_name, industry_class_dict): with MongoConnect(self.database): index_members_data = AShareIndexMembers.objects( index_code__in=industry_class_dict.keys()).as_pymongo() field_list = ['index_code', 'security_code', 'in_date', 'out_date'] self.index_members_df = pd.DataFrame( list(index_members_data)).reindex(columns=field_list) self.index_members_df = self.index_members_df.fillna( datetime.now()).reset_index(drop=True) get_collection_list = GetCollectionList() a_share_list = get_collection_list.get_a_share_list() calendar_obj = GetCalendar() calendar_SH = calendar_obj.get_calendar('SH') self.index_class = pd.DataFrame(columns=a_share_list, index=calendar_SH) def industry_history(x, index_members_df): industry_in_out_date = index_members_df[ index_members_df.security_code == x.name] for index, row in industry_in_out_date.iterrows(): x[row['in_date']:row['out_date']] = row['index_code'] return x self.index_class = self.index_class.apply( industry_history, args=(self.index_members_df, ), axis=0) self.index_class = self.index_class.fillna(method='pad').fillna( method='backfill') folder_name = LocalDataFolderName.INDUSTRY_CLASS.value path = LocalDataPath.path + folder_name + '/' data_name = industry_class_name save_data_to_hdf5(path, data_name, self.index_class)
def save_a_share_adj_factor_right(self): """ 取当日收盘价,作为转、送的股价, 再计算复权因子更新到AShareExRightDividend, 复权因子adj_factor 比例 = 送股比例 + 转增比例 + 缩减比例 单次复权因子 = 股权登记日收盘价 * (1 + 比例 + 配股比例 + 增发比例) / (股权登记日收盘价 - 派息比例 + 股权登记日收盘价 * 比例 + 配股价格 * 配股比例 + 增发价格 * 增发比例) :return: """ kline_object = GetKlineData() all_market_data = kline_object.cache_all_stock_data() with MongoConnect(self.database): self.data = pd.DataFrame( AShareExRightDividend.objects.as_pymongo()) self.data['close'] = self.data.apply( lambda x: self.get_adj_day_close(x['security_code'], x[ 'ex_date'], all_market_data), axis=1) self.data = self.data.fillna(0) ratio = self.data['bonus_share_ratio'] + self.data[ 'conversed_ratio'] + self.data['consolidate_split_ratio'] self.data['adj_factor'] = self.data['close'] * ( 1 + ratio + self.data['rightsissue_ratio'] + self.data['seo_ratio'] ) / (self.data['close'] - self.data['cash_dividend_ratio'] + self.data['close'] * ratio + self.data['rightsissue_price'] * self.data['rightsissue_ratio'] + self.data['seo_price'] * self.data['seo_ratio']) folder_name = LocalDataFolderName.ADJ_FACTOR.value path = LocalDataPath.path + folder_name + '/' self.data = self.data.reindex( columns=['security_code', 'ex_date', 'adj_factor']) self.data.set_index(["ex_date"], inplace=True) self.data.sort_index(inplace=True) calendar_obj = GetCalendar() calendar = calendar_obj.get_calendar('SZ') backward_factor = pd.DataFrame(index=calendar) adj_factor = pd.DataFrame(index=calendar) data_dict = dict( list(self.data.groupby(self.data['security_code']))) for security_code, adj_data in data_dict.items(): backward_factor[security_code] = self.cal_backward_factor( adj_data['adj_factor']) adj_factor[security_code] = adj_data['adj_factor'] backward_factor.replace([np.inf, 0], np.nan, inplace=True) backward_factor.fillna(method='ffill', inplace=True) backward_factor.fillna(1, inplace=True) backward_factor = backward_factor.reindex( columns=all_market_data['close'].columns, fill_value=1) save_data_to_hdf5(path, AdjustmentFactor.BACKWARD_ADJ_FACTOR.value, backward_factor) save_data_to_hdf5(path, AdjustmentFactor.FROWARD_ADJ_FACTOR.value, backward_factor.div(backward_factor.iloc[-1]))
def get_all_market_data(self, security_list, end=datetime.now()): """ :param security_list: :param end: :return: """ calendar_obj = GetCalendar() self.calendar_SZ = calendar_obj.get_calendar('SZ') self.end = end database = DatabaseName.A_SHARE_KLINE_DAILY.value process_num = cpu_count() + 2 process_stock_num = int(len(security_list) / process_num) + 1 security_list_split = [] for i in range(int(len(security_list) / process_stock_num)): if i < int(len(security_list) / process_stock_num) - 1: security_list_split.append( security_list[i * process_stock_num:(i + 1) * process_stock_num]) else: security_list_split.append(security_list[i * process_stock_num:]) with Manager() as manager: process_pool = Pool(process_num) process_manager_dict = manager.dict() for security_list_i in range(len(security_list_split)): process_pool.apply_async( self._get_data_with_process_pool, args=(database, security_list_split[security_list_i], process_manager_dict, security_list_i)) process_pool.close() process_pool.join() process_dict = dict(process_manager_dict) stock_data_dict = {} for single_stock_data in process_dict.values(): stock_data_dict.update(single_stock_data) field_data_dict = {} for i in self.field: if i != 'time_tag': field_data_pd = pd.DataFrame({ key: value[i] for key, value in stock_data_dict.items() }) # 原始数据的开高低收除以10000 if i in ['open', 'high', 'low', 'close']: field_data_dict[i] = field_data_pd.div(10000) else: field_data_dict[i] = field_data_pd return field_data_dict