Esempio n. 1
0
    def save_factor_data(self, factor_name, data_source=None):
        if data_source is None:
            data_source = ['hdf5', 'mongo']
        if 'hdf5' in data_source:
            # 保存预处理之后的数据到本地hdf5,单因子检测使用
            path = LocalDataPath.path + LocalDataFolderName.FACTOR.value + '/'
            save_data_to_hdf5(path, factor_name, self.raw_data)

        if 'mongo' in data_source:
            # 保存预处理之后的数据到mongo
            with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value):
                doc_list = []
                raw_data = self.raw_data.rename(columns={
                    i: code_market_to_market_code(i)
                    for i in extreme_data.columns
                })
                for index, row in raw_data.iterrows():
                    doc = FactorPreProcessingData(factor_name=factor_name,
                                                  time_tag=index,
                                                  factor_data=row)
                    doc_list.append(doc)
                    if len(doc_list) > 999:
                        FactorPreProcessingData.objects.insert(doc_list)
                        doc_list = []
                else:
                    FactorPreProcessingData.objects.insert(doc_list)
Esempio n. 2
0
    def update_index_class(self, industry_class_name, industry_class_dict):
        with MongoConnect(self.database):
            index_members_data = AShareIndexMembers.objects(
                index_code__in=industry_class_dict.keys()).as_pymongo()
            field_list = ['index_code', 'security_code', 'in_date', 'out_date']
            self.index_members_df = pd.DataFrame(
                list(index_members_data)).reindex(columns=field_list)
            self.index_members_df = self.index_members_df.fillna(
                datetime.now()).reset_index(drop=True)

            get_collection_list = GetCollectionList()
            a_share_list = get_collection_list.get_a_share_list()
            calendar_obj = GetCalendar()
            calendar_SH = calendar_obj.get_calendar('SH')
            self.index_class = pd.DataFrame(columns=a_share_list,
                                            index=calendar_SH)

            def industry_history(x, index_members_df):
                industry_in_out_date = index_members_df[
                    index_members_df.security_code == x.name]
                for index, row in industry_in_out_date.iterrows():
                    x[row['in_date']:row['out_date']] = row['index_code']
                return x

            self.index_class = self.index_class.apply(
                industry_history, args=(self.index_members_df, ), axis=0)
            self.index_class = self.index_class.fillna(method='pad').fillna(
                method='backfill')
            folder_name = LocalDataFolderName.INDUSTRY_CLASS.value
            path = LocalDataPath.path + folder_name + '/'
            data_name = industry_class_name
            save_data_to_hdf5(path, data_name, self.index_class)
Esempio n. 3
0
    def update_index_data(self, end=datetime.now()):
        """

        :param end:
        :return:
        """
        get_collection_list = GetCollectionList()
        index_list = get_collection_list.get_index_list()
        self.end = end
        database = DatabaseName.INDEX_KLINE_DAILY.value
        with MongoConnect(database):
            index_data_dict = {}
            for index_code in index_list:
                with switch_collection(Kline, index_code) as KlineDaily_index_code:
                    security_code_data = KlineDaily_index_code.objects(time_tag__lte=self.end).as_pymongo()
                    security_code_data_df = pd.DataFrame(list(security_code_data)).reindex(columns=self.field)
                    security_code_data_df.set_index(["time_tag"], inplace=True)
                    index_data_dict[index_code] = security_code_data_df
        field_data_dict = {}
        for i in self.field:
            if i != 'time_tag':
                field_data_pd = pd.DataFrame({key: value[i] for key, value in index_data_dict.items()})
                # 原始数据的开高低收除以10000
                if i in ['open', 'high', 'low', 'close']:
                    field_data_dict[i] = field_data_pd.div(10000)
                else:
                    field_data_dict[i] = field_data_pd
        folder_name = LocalDataFolderName.MARKET_DATA.value
        sub_folder_name = LocalDataFolderName.KLINE_DAILY.value
        sub_sub_folder_name = LocalDataFolderName.INDEX.value
        for field in self.field:
            if field not in ['time_tag', 'interest']:
                path = LocalDataPath.path + folder_name + '/' + sub_folder_name + '/' + sub_sub_folder_name + '/'
                data_name = field
                save_data_to_hdf5(path, data_name, pd.DataFrame(field_data_dict[field]))
 def update_index_members(self):
     with MongoConnect(self.database):
         index_members_data = AShareIndexMembers.objects().as_pymongo()
         field_list = ['index_code', 'security_code', 'in_date', 'out_date']
         self.index_members_df = pd.DataFrame(
             list(index_members_data)).reindex(columns=field_list)
         folder_name = LocalDataFolderName.INDEX_MEMBER.value
         path = LocalDataPath.path + folder_name + '/'
         data_name = folder_name
         save_data_to_hdf5(path, data_name, self.index_members_df)
    def save_a_share_adj_factor_right(self):
        """
        取当日收盘价,作为转、送的股价,
        再计算复权因子更新到AShareExRightDividend, 复权因子adj_factor
        比例 = 送股比例 + 转增比例 + 缩减比例
        单次复权因子 = 股权登记日收盘价 * (1 + 比例 + 配股比例 + 增发比例) /
        (股权登记日收盘价 - 派息比例 + 股权登记日收盘价 * 比例 + 配股价格 * 配股比例 + 增发价格 * 增发比例)
        :return:
        """
        kline_object = GetKlineData()
        all_market_data = kline_object.cache_all_stock_data()

        with MongoConnect(self.database):
            self.data = pd.DataFrame(
                AShareExRightDividend.objects.as_pymongo())
            self.data['close'] = self.data.apply(
                lambda x: self.get_adj_day_close(x['security_code'], x[
                    'ex_date'], all_market_data),
                axis=1)
            self.data = self.data.fillna(0)
            ratio = self.data['bonus_share_ratio'] + self.data[
                'conversed_ratio'] + self.data['consolidate_split_ratio']
            self.data['adj_factor'] = self.data['close'] * (
                1 + ratio + self.data['rightsissue_ratio'] +
                self.data['seo_ratio']
            ) / (self.data['close'] - self.data['cash_dividend_ratio'] +
                 self.data['close'] * ratio + self.data['rightsissue_price'] *
                 self.data['rightsissue_ratio'] +
                 self.data['seo_price'] * self.data['seo_ratio'])

            folder_name = LocalDataFolderName.ADJ_FACTOR.value
            path = LocalDataPath.path + folder_name + '/'
            self.data = self.data.reindex(
                columns=['security_code', 'ex_date', 'adj_factor'])
            self.data.set_index(["ex_date"], inplace=True)
            self.data.sort_index(inplace=True)
            calendar_obj = GetCalendar()
            calendar = calendar_obj.get_calendar('SZ')
            backward_factor = pd.DataFrame(index=calendar)
            adj_factor = pd.DataFrame(index=calendar)
            data_dict = dict(
                list(self.data.groupby(self.data['security_code'])))
            for security_code, adj_data in data_dict.items():
                backward_factor[security_code] = self.cal_backward_factor(
                    adj_data['adj_factor'])
                adj_factor[security_code] = adj_data['adj_factor']
            backward_factor.replace([np.inf, 0], np.nan, inplace=True)
            backward_factor.fillna(method='ffill', inplace=True)
            backward_factor.fillna(1, inplace=True)
            backward_factor = backward_factor.reindex(
                columns=all_market_data['close'].columns, fill_value=1)
            save_data_to_hdf5(path, AdjustmentFactor.BACKWARD_ADJ_FACTOR.value,
                              backward_factor)
            save_data_to_hdf5(path, AdjustmentFactor.FROWARD_ADJ_FACTOR.value,
                              backward_factor.div(backward_factor.iloc[-1]))
Esempio n. 6
0
 def update_calendar_hdf5(self):
     with MongoConnect(self.database):
         data = AShareCalendar.objects().as_pymongo()
         data_df = pd.DataFrame(data)
         data_df.set_index('market', inplace=True)
         data_df = data_df.drop(['_id', 'update_date'], axis=1)
         folder_name = LocalDataFolderName.CALENDAR.value
         for index, row in data_df.iterrows():
             path = LocalDataPath.path + folder_name + '/'
             data_name = folder_name + '_' + str(index)
             save_data_to_hdf5(path, data_name, pd.DataFrame(data_df.loc[index, 'trade_days']))
Esempio n. 7
0
 def update_all_market_data(self, end=datetime.now()):
     get_collection_list = GetCollectionList()
     a_share_list = get_collection_list.get_a_share_list()
     a_share_list = [i for i in a_share_list if is_security_type(i, 'EXTRA_STOCK_A')]
     all_market_data = self.get_all_market_data(security_list=a_share_list, end=end)
     folder_name = LocalDataFolderName.MARKET_DATA.value
     sub_folder_name = LocalDataFolderName.KLINE_DAILY.value
     sub_sub_folder_name = LocalDataFolderName.A_SHARE.value
     for field in self.field:
         if field not in ['time_tag', 'interest']:
             path = LocalDataPath.path + folder_name + '/' + sub_folder_name + '/' + sub_sub_folder_name + '/'
             data_name = field
             save_data_to_hdf5(path, data_name, pd.DataFrame(all_market_data[field]))
    def update_a_share_capitalization(self):
        """
        保存 总股本,总市值, 流通股本,流通市值 四个hdf5
        :return:
        """
        with MongoConnect(self.database):
            a_share_capitalization = AShareCapitalization.objects().as_pymongo(
            )
            field_list = [
                'security_code', 'change_date', 'total_share', 'float_share',
                'float_a_share', 'float_b_share', 'float_h_share'
            ]
            self.a_share_capitalization = pd.DataFrame(
                list(a_share_capitalization)).reindex(columns=field_list)
            kline_object = GetKlineData()
            market_close_data = kline_object.cache_all_stock_data()['close']
            index = list(
                set(market_close_data.index).union(
                    set(self.a_share_capitalization['change_date'])))
            index.sort()
            share_capitalization_grouped = self.a_share_capitalization.groupby(
                'security_code')

            total_share = pd.DataFrame({})
            float_a_share = pd.DataFrame({})
            for i in share_capitalization_grouped:
                data = i[1].sort_values('change_date').set_index('change_date')
                try:
                    total_share[i[0]] = data['total_share'].reindex(index)
                    float_a_share[i[0]] = data['float_a_share'].reindex(index)
                except ValueError:
                    # 有四只票 change date 重复,需要手工清洗修正
                    # print(data[data.index.duplicated()])
                    total_share[i[0]] = data[
                        data.index.duplicated()]['total_share'].reindex(index)
                    float_a_share[i[0]] = data[data.index.duplicated(
                    )]['float_a_share'].reindex(index)
            total_share = total_share.fillna(method='ffill').reindex(
                market_close_data.index)
            float_a_share = float_a_share.fillna(method='ffill').reindex(
                market_close_data.index)
            total_share_value = total_share.multiply(10000) * market_close_data
            float_a_share_value = float_a_share.multiply(
                10000) * market_close_data

            folder_name = LocalDataFolderName.INDICATOR_EVERYDAY.value
            path = LocalDataPath.path + folder_name + '/'
            save_data_to_hdf5(path, 'total_share', total_share)
            save_data_to_hdf5(path, 'float_a_share', float_a_share)
            save_data_to_hdf5(path, 'total_share_value', total_share_value)
            save_data_to_hdf5(path, 'float_a_share_value', float_a_share_value)
Esempio n. 9
0
 def update_a_sws_index(self):
     database = DatabaseName.STOCK_BASE_DATA.value
     with MongoConnect(database):
         a_sws_index = ASwsIndex.objects().as_pymongo()
         field_list = [
             'sw_index_code', 'time_tag', 'pre_close', 'open', 'high',
             'low', 'close', 'volume', 'amount', 'index_pe', 'index_pb',
             'index_free_float_market_capitalisation',
             'index_total_market_capitalisation'
         ]
         self.a_sws_index_df = pd.DataFrame(a_sws_index).reindex(
             columns=field_list)
         self.a_sws_index_df[['pre_close', 'open', 'high', 'low',
                              'close']] = self.a_sws_index_df[[
                                  'pre_close', 'open', 'high', 'low',
                                  'close'
                              ]].div(10000)
         folder_name = LocalDataFolderName.SWS_INDEX.value
         path = LocalDataPath.path + folder_name + '/'
         data_name = folder_name
         save_data_to_hdf5(path, data_name, self.a_sws_index_df)
    factor_pre_obj = FactorPreProcessing(indicator_data)
    # 可根据时间和股票list过滤数据
    data_filter = factor_pre_obj.data_filter()
    # 去极值方法,四种
    extreme_data = factor_pre_obj.extreme_processing(
        dict(std={'sigma_multiple': 3}))
    # extreme_data = factor_pre_obj.extreme_processing(dict(mad={'median_multiple': 1.483}))
    # extreme_data = factor_pre_obj.extreme_processing(dict(quantile={'quantile_min': 0.025, 'quantile_max': 0.975}))
    # extreme_data = factor_pre_obj.extreme_processing(dict(box_plot={'median_multiple': 3}))

    # 中性化方法,可选择行业和流通市值中性
    neutralize_data = factor_pre_obj.neutralize_processing(
        dict(neutralize_method=[
            NeutralizeMethod.INDUSTRY.value,
            NeutralizeMethod.MARKET_VALUE.value
        ]))

    # 归一化方法,三种
    # scale_data = factor_pre_obj.scale_processing(ScaleMethod.MIN_MAX.value)
    scale_data = factor_pre_obj.scale_processing(ScaleMethod.Z_SCORE.value)
    # scale_data = factor_pre_obj.scale_processing(ScaleMethod.RANK.value)

    # 补充空值的方法,已实现两种
    fill_nan_data = factor_pre_obj.fill_nan_processing(
        FillNanMethod.MEAN.value)

    # 保存预处理之后的因子数据,单因子检测使用
    path = LocalDataPath.path + LocalDataFolderName.FACTOR.value + '/'
    save_data_to_hdf5(path, 'factor_ma10', fill_nan_data)