예제 #1
0
    def insert_security_code(self, market, file_name, path):
        database = DatabaseName.INDEX_KLINE_DAILY.value
        with MongoConnect(database):
            print(path + file_name + '\n')
            kline_daily_data = pd.read_csv(path + file_name, encoding='unicode_escape')
            code = file_name.split('.')[0]
            code_transfer_dict = {'999999': '000001', '999998': '000002', '999997': '000003', '999996': '000004',
                                  '999995': '000005', '999994': '000006', '999993': '000007', '999992': '000008',
                                  '999991': '000010', '999990': '000011', '999989': '000012', '999988': '000013',
                                  '999987': '000016', '999986': '000015', '000300': '000300'}
            if market == 'SH':
                if code in code_transfer_dict.keys():
                    code = code_transfer_dict[code]
                else:
                    code = '00' + code[2:]
            security_code = code + '.' + market
            kline_daily_data = kline_daily_data.reindex(columns=['date', 'open', 'high', 'low', 'close', 'volumw',
                                                                 'turover', 'match_items', 'interest'])
            kline_daily_data.rename(columns={'volumw': 'volume', 'turover': 'amount'},  inplace=True)

            with switch_collection(Kline, security_code) as KlineDaily_security_code:
                doc_list = []
                for index, row in kline_daily_data.iterrows():
                    date_int = int(row['date'])
                    if not np.isnan(date_int):
                        date_int = str(date_int)
                        time_tag = datetime.strptime(date_int, "%Y%m%d")
                        doc = KlineDaily_security_code(time_tag=time_tag, pre_close=None,
                                                       open=int(row['open']), high=int(row['high']),
                                                       low=int(row['low']), close=int(row['close']),
                                                       volume=int(row['volume']), amount=int(row['amount']),
                                                       match_items=int(row['match_items']), interest=int(row['interest']))
                        doc_list.append(doc)

                KlineDaily_security_code.objects.insert(doc_list)
    def update_a_share_capitalization(self):
        """
        保存 总股本,总市值, 流通股本,流通市值 四个hdf5
        :return:
        """
        with MongoConnect(self.database):
            a_share_capitalization = AShareCapitalization.objects().as_pymongo()
            field_list = ['security_code', 'change_date', 'total_share', 'float_share', 'float_a_share',
                          'float_b_share', 'float_h_share']
            self.a_share_capitalization = pd.DataFrame(list(a_share_capitalization)).reindex(columns=field_list)
            kline_object = GetKlineData()
            market_close_data = kline_object.cache_all_stock_data()['close']
            index = list(set(market_close_data.index).union(set(self.a_share_capitalization['change_date'])))

            share_capitalization_grouped = self.a_share_capitalization.groupby('security_code')

            share_capitalization = pd.DataFrame(index=index)
            for i in share_capitalization_grouped:
                data = i[1].reindex(['change_date', 'total_share'], axis=1).sort_values('change_date').set_index(
                    'change_date')
                try:
                    share_capitalization[i[0]] = data
                except ValueError:
                    # 有四只票 change date 重复,需要手工清洗修正
                    # print(data)
                    share_capitalization[i[0]] = data[data.index.duplicated()]
            share_capitalization = share_capitalization.fillna(method='ffill').reindex(market_close_data.index)
            return share_capitalization.multiply(10000) * market_close_data
예제 #3
0
    def update_index_class(self, industry_class_name, industry_class_dict):
        with MongoConnect(self.database):
            index_members_data = AShareIndexMembers.objects(
                index_code__in=industry_class_dict.keys()).as_pymongo()
            field_list = ['index_code', 'security_code', 'in_date', 'out_date']
            self.index_members_df = pd.DataFrame(
                list(index_members_data)).reindex(columns=field_list)
            self.index_members_df = self.index_members_df.fillna(
                datetime.now()).reset_index(drop=True)

            get_collection_list = GetCollectionList()
            a_share_list = get_collection_list.get_a_share_list()
            calendar_obj = GetCalendar()
            calendar_SH = calendar_obj.get_calendar('SH')
            self.index_class = pd.DataFrame(columns=a_share_list,
                                            index=calendar_SH)

            def industry_history(x, index_members_df):
                industry_in_out_date = index_members_df[
                    index_members_df.security_code == x.name]
                for index, row in industry_in_out_date.iterrows():
                    x[row['in_date']:row['out_date']] = row['index_code']
                return x

            self.index_class = self.index_class.apply(
                industry_history, args=(self.index_members_df, ), axis=0)
            self.index_class = self.index_class.fillna(method='pad').fillna(
                method='backfill')
            folder_name = LocalDataFolderName.INDUSTRY_CLASS.value
            path = LocalDataPath.path + folder_name + '/'
            data_name = industry_class_name
            save_data_to_hdf5(path, data_name, self.index_class)
예제 #4
0
    def save_factor_data(self, factor_name, data_source=None):
        if data_source is None:
            data_source = ['hdf5', 'mongo']
        if 'hdf5' in data_source:
            # 保存预处理之后的数据到本地hdf5,单因子检测使用
            path = LocalDataPath.path + LocalDataFolderName.FACTOR.value + '/'
            save_data_to_hdf5(path, factor_name, self.raw_data)

        if 'mongo' in data_source:
            # 保存预处理之后的数据到mongo
            with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value):
                doc_list = []
                raw_data = self.raw_data.rename(columns={
                    i: code_market_to_market_code(i)
                    for i in extreme_data.columns
                })
                for index, row in raw_data.iterrows():
                    doc = FactorPreProcessingData(factor_name=factor_name,
                                                  time_tag=index,
                                                  factor_data=row)
                    doc_list.append(doc)
                    if len(doc_list) > 999:
                        FactorPreProcessingData.objects.insert(doc_list)
                        doc_list = []
                else:
                    FactorPreProcessingData.objects.insert(doc_list)
    def save_a_share_ex_right_dividend(self):
        database = 'stock_base_data'
        with MongoConnect(database):
            doc_list = []
            for index, row in self.data_df.iterrows():
                row_dict = dict(row)

                row_dict['security_code'] = row_dict['S_INFO_WINDCODE']
                row_dict.pop('OBJECT_ID')
                row_dict.pop('S_INFO_WINDCODE')

                doc = AShareExRightDividend()

                for key, value in row_dict.items():
                    if key.lower() in self.collection_property_list:
                        property_name = AShareExRightDividend.__dict__[key.lower()]
                        if isinstance(property_name, StringField):
                            setattr(doc, key.lower(), str(value))
                        elif isinstance(property_name, DateTimeField):
                            if np.isnan(value):
                                setattr(doc, key.lower(), None)
                            else:
                                setattr(doc, key.lower(), datetime.strptime(str(int(value)), "%Y%m%d"))
                        else:
                            setattr(doc, key.lower(), value)
                doc_list.append(doc)
                if len(doc_list) > 999:
                    AShareExRightDividend.objects.insert(doc_list)
                    doc_list = []
            else:
                AShareExRightDividend.objects.insert(doc_list)
 def update_index_members(self):
     with MongoConnect(self.database):
         index_members_data = AShareIndexMembers.objects().as_pymongo()
         field_list = ['index_code', 'security_code', 'in_date', 'out_date']
         self.index_members_df = pd.DataFrame(
             list(index_members_data)).reindex(columns=field_list)
         folder_name = LocalDataFolderName.INDEX_MEMBER.value
         path = LocalDataPath.path + folder_name + '/'
         data_name = folder_name
         save_data_to_hdf5(path, data_name, self.index_members_df)
    def save_a_share_adj_factor_right(self):
        """
        取当日收盘价,作为转、送的股价,
        再计算复权因子更新到AShareExRightDividend, 复权因子adj_factor
        比例 = 送股比例 + 转增比例 + 缩减比例
        单次复权因子 = 股权登记日收盘价 * (1 + 比例 + 配股比例 + 增发比例) /
        (股权登记日收盘价 - 派息比例 + 股权登记日收盘价 * 比例 + 配股价格 * 配股比例 + 增发价格 * 增发比例)
        :return:
        """
        kline_object = GetKlineData()
        all_market_data = kline_object.cache_all_stock_data()

        with MongoConnect(self.database):
            self.data = pd.DataFrame(
                AShareExRightDividend.objects.as_pymongo())
            self.data['close'] = self.data.apply(
                lambda x: self.get_adj_day_close(x['security_code'], x[
                    'ex_date'], all_market_data),
                axis=1)
            self.data = self.data.fillna(0)
            ratio = self.data['bonus_share_ratio'] + self.data[
                'conversed_ratio'] + self.data['consolidate_split_ratio']
            self.data['adj_factor'] = self.data['close'] * (
                1 + ratio + self.data['rightsissue_ratio'] +
                self.data['seo_ratio']
            ) / (self.data['close'] - self.data['cash_dividend_ratio'] +
                 self.data['close'] * ratio + self.data['rightsissue_price'] *
                 self.data['rightsissue_ratio'] +
                 self.data['seo_price'] * self.data['seo_ratio'])

            folder_name = LocalDataFolderName.ADJ_FACTOR.value
            path = LocalDataPath.path + folder_name + '/'
            self.data = self.data.reindex(
                columns=['security_code', 'ex_date', 'adj_factor'])
            self.data.set_index(["ex_date"], inplace=True)
            self.data.sort_index(inplace=True)
            calendar_obj = GetCalendar()
            calendar = calendar_obj.get_calendar('SZ')
            backward_factor = pd.DataFrame(index=calendar)
            adj_factor = pd.DataFrame(index=calendar)
            data_dict = dict(
                list(self.data.groupby(self.data['security_code'])))
            for security_code, adj_data in data_dict.items():
                backward_factor[security_code] = self.cal_backward_factor(
                    adj_data['adj_factor'])
                adj_factor[security_code] = adj_data['adj_factor']
            backward_factor.replace([np.inf, 0], np.nan, inplace=True)
            backward_factor.fillna(method='ffill', inplace=True)
            backward_factor.fillna(1, inplace=True)
            backward_factor = backward_factor.reindex(
                columns=all_market_data['close'].columns, fill_value=1)
            save_data_to_hdf5(path, AdjustmentFactor.BACKWARD_ADJ_FACTOR.value,
                              backward_factor)
            save_data_to_hdf5(path, AdjustmentFactor.FROWARD_ADJ_FACTOR.value,
                              backward_factor.div(backward_factor.iloc[-1]))
예제 #8
0
 def update_calendar_hdf5(self):
     with MongoConnect(self.database):
         data = AShareCalendar.objects().as_pymongo()
         data_df = pd.DataFrame(data)
         data_df.set_index('market', inplace=True)
         data_df = data_df.drop(['_id', 'update_date'], axis=1)
         folder_name = LocalDataFolderName.CALENDAR.value
         for index, row in data_df.iterrows():
             path = LocalDataPath.path + folder_name + '/'
             data_name = folder_name + '_' + str(index)
             save_data_to_hdf5(path, data_name, pd.DataFrame(data_df.loc[index, 'trade_days']))
예제 #9
0
    def insert_security_code(self, market, file_name, path):
        with MongoConnect(self.database):
            print(path + file_name + '\n')
            kline_daily_data = pd.read_csv(path + file_name, encoding='unicode_escape')
            security_code = file_name.split('.')[0] + '.' + market
            if is_security_type(security_code, 'EXTRA_STOCK_A'):
                kline_daily_data = kline_daily_data.reindex(columns=['date', 'open', 'high', 'low', 'close', 'volumw',
                                                                     'turover', 'match_items', 'interest'])
                kline_daily_data.rename(columns={'volumw': 'volume', 'turover': 'amount'},  inplace=True)
                kline_daily_data = kline_daily_data[kline_daily_data.date >= 20020104]
                with switch_collection(Kline, security_code) as KlineDaily_security_code:
                    doc_list = []
                    security_code_data = pd.DataFrame()
                    if security_code in self.data_dict.keys():
                        security_code_data = self.data_dict[security_code].set_index(["TRADE_DT"])
                        security_code_data = security_code_data.fillna(0)
                    for index, row in kline_daily_data.iterrows():
                        date_int = int(row['date'])
                        if not np.isnan(date_int):
                            try:
                                pre_close = int(10000 * security_code_data.loc[date_int, 'S_DQ_PRECLOSE'])
                            except KeyError:
                                pre_close = None
                            date_int = str(date_int)
                            time_tag = datetime.strptime(date_int, "%Y%m%d")
                            doc = KlineDaily_security_code(time_tag=time_tag, pre_close=pre_close,
                                                           open=int(row['open']), high=int(row['high']),
                                                           low=int(row['low']), close=int(row['close']),
                                                           volume=int(row['volume']), amount=int(row['amount']),
                                                           match_items=int(row['match_items']), interest=int(row['interest']))
                            doc_list.append(doc)

                    # 用csv全表补充20020104之前的日线数据,match_items为0
                    security_code_data = security_code_data[security_code_data.index < 20020104]
                    for index, row in security_code_data.iterrows():
                        if row['S_DQ_AMOUNT'] > 0:
                            date_int = int(index)
                            date_int = str(date_int)
                            time_tag = datetime.strptime(date_int, "%Y%m%d")
                            try:
                                pre_close = int(row['S_DQ_PRECLOSE'] * 10000)
                            except KeyError:
                                pre_close = None
                            doc = KlineDaily_security_code(time_tag=time_tag, pre_close=pre_close,
                                                           open=int(row['S_DQ_OPEN'] * 10000),
                                                           high=int(row['S_DQ_HIGH'] * 10000),
                                                           low=int(row['S_DQ_LOW'] * 10000),
                                                           close=int(row['S_DQ_CLOSE'] * 10000),
                                                           volume=int(row['S_DQ_VOLUME'] * 100),
                                                           amount=int(row['S_DQ_AMOUNT'] * 1000),
                                                           match_items=0, interest=0)
                            doc_list.append(doc)
                    KlineDaily_security_code.objects.insert(doc_list)
    def update_a_share_capitalization(self):
        """
        保存 总股本,总市值, 流通股本,流通市值 四个hdf5
        :return:
        """
        with MongoConnect(self.database):
            a_share_capitalization = AShareCapitalization.objects().as_pymongo(
            )
            field_list = [
                'security_code', 'change_date', 'total_share', 'float_share',
                'float_a_share', 'float_b_share', 'float_h_share'
            ]
            self.a_share_capitalization = pd.DataFrame(
                list(a_share_capitalization)).reindex(columns=field_list)
            kline_object = GetKlineData()
            market_close_data = kline_object.cache_all_stock_data()['close']
            index = list(
                set(market_close_data.index).union(
                    set(self.a_share_capitalization['change_date'])))
            index.sort()
            share_capitalization_grouped = self.a_share_capitalization.groupby(
                'security_code')

            total_share = pd.DataFrame({})
            float_a_share = pd.DataFrame({})
            for i in share_capitalization_grouped:
                data = i[1].sort_values('change_date').set_index('change_date')
                try:
                    total_share[i[0]] = data['total_share'].reindex(index)
                    float_a_share[i[0]] = data['float_a_share'].reindex(index)
                except ValueError:
                    # 有四只票 change date 重复,需要手工清洗修正
                    # print(data[data.index.duplicated()])
                    total_share[i[0]] = data[
                        data.index.duplicated()]['total_share'].reindex(index)
                    float_a_share[i[0]] = data[data.index.duplicated(
                    )]['float_a_share'].reindex(index)
            total_share = total_share.fillna(method='ffill').reindex(
                market_close_data.index)
            float_a_share = float_a_share.fillna(method='ffill').reindex(
                market_close_data.index)
            total_share_value = total_share.multiply(10000) * market_close_data
            float_a_share_value = float_a_share.multiply(
                10000) * market_close_data

            folder_name = LocalDataFolderName.INDICATOR_EVERYDAY.value
            path = LocalDataPath.path + folder_name + '/'
            save_data_to_hdf5(path, 'total_share', total_share)
            save_data_to_hdf5(path, 'float_a_share', float_a_share)
            save_data_to_hdf5(path, 'total_share_value', total_share_value)
            save_data_to_hdf5(path, 'float_a_share_value', float_a_share_value)
예제 #11
0
 def _get_data_with_process_pool(self, database, security_list,
                                 process_manager_dict, security_list_i):
     with MongoConnect(database):
         thread_data_dict = {}
         for stock in security_list:
             with switch_collection(Kline,
                                    stock) as KlineDaily_security_code:
                 security_code_data = KlineDaily_security_code.objects(
                     time_tag__lte=self.end).as_pymongo()
                 security_code_data_df = pd.DataFrame(
                     list(security_code_data)).reindex(columns=self.field)
                 security_code_data_df.set_index(["time_tag"], inplace=True)
                 thread_data_dict[stock] = security_code_data_df.reindex(
                     self.calendar_SZ).fillna(method='ffill')
         process_manager_dict[security_list_i] = thread_data_dict
예제 #12
0
 def save_a_share_calendar(self):
     database = DatabaseName.STOCK_BASE_DATA.value
     with MongoConnect(database):
         doc_list = []
         data_grouped = self.data_df.groupby("S_INFO_EXCHMARKET")
         data_dict = {i[0]: list(i[1]['TRADE_DAYS']) for i in data_grouped}
         for market, trade_days in data_dict.items():
             if market == 'SSE':
                 market = 'SH'
             elif market == 'SZSE':
                 market = 'SZ'
             trade_days = [
                 date_to_datetime(str(i)) for i in sorted(trade_days)
             ]
             doc = AShareCalendar(market=market, trade_days=trade_days)
             doc_list.append(doc)
         AShareCalendar.objects.insert(doc_list)
예제 #13
0
    def update_index_data(self, end=datetime.now()):
        """

        :param end:
        :return:
        """
        get_collection_list = GetCollectionList()
        index_list = get_collection_list.get_index_list()
        self.end = end
        database = DatabaseName.INDEX_KLINE_DAILY.value
        with MongoConnect(database):
            index_data_dict = {}
            for index_code in index_list:
                with switch_collection(Kline,
                                       index_code) as KlineDaily_index_code:
                    security_code_data = KlineDaily_index_code.objects(
                        time_tag__lte=self.end).as_pymongo()
                    security_code_data_df = pd.DataFrame(
                        list(security_code_data)).reindex(columns=self.field)
                    security_code_data_df.set_index(["time_tag"], inplace=True)
                    # 数据库中数据多了一天,特殊处理删除了
                    if pd.Timestamp(datetime(
                            2016, 1, 1)) in security_code_data_df.index:
                        security_code_data_df = security_code_data_df.drop(
                            labels=datetime(2016, 1, 1), axis=0)
                    index_data_dict[index_code] = security_code_data_df
        field_data_dict = {}
        for i in self.field:
            if i != 'time_tag':
                field_data_pd = pd.DataFrame(
                    {key: value[i]
                     for key, value in index_data_dict.items()})
                # 原始数据的开高低收除以10000
                if i in ['open', 'high', 'low', 'close']:
                    field_data_dict[i] = field_data_pd.div(10000)
                else:
                    field_data_dict[i] = field_data_pd
        folder_name = LocalDataFolderName.MARKET_DATA.value
        sub_folder_name = LocalDataFolderName.KLINE_DAILY.value
        sub_sub_folder_name = LocalDataFolderName.INDEX.value
        for field in self.field:
            if field not in ['time_tag', 'interest']:
                path = LocalDataPath.path + folder_name + '/' + sub_folder_name + '/' + sub_sub_folder_name + '/'
                data_name = field
                save_data_to_hdf5(path, data_name,
                                  pd.DataFrame(field_data_dict[field]))
예제 #14
0
 def save_ic_analysis_result(self, factor_name):
     with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value):
         ic_df = self.ic_df.copy()
         p_value_df = self.p_value_df.copy()
         ic_df.index = ic_df.index.format()
         p_value_df.index = p_value_df.index.format()
         doc = FactorIcAnalysisResult(
             factor_name=factor_name,
             # 因子数据开始时间
             begin_date=self.factor.index[0],
             # 因子数据结束时间
             end_date=self.factor.index[-1],
             # IC信号衰减计算,index 是时间序列, columns是decay周期,[1, self.ic_decay], 闭区间
             ic=ic_df,
             # p值信号衰减计算,index 是时间序列, columns是decay周期,[1, self.ic_decay], 闭区间
             p_value=p_value_df,
             ic_result=self.ic_result)
         doc.save()
예제 #15
0
    def save_a_sw_index(self):
        database = 'stock_base_data'

        with MongoConnect(database):
            doc_list = []
            for index, row in self.data_df.iterrows():
                row = dict(row)
                for filed, value in row.items():
                    if filed in [
                            'S_DQ_PRECLOSE', 'S_DQ_OPEN', 'S_DQ_HIGH',
                            'S_DQ_LOW', 'S_DQ_CLOSE'
                    ]:
                        if not np.isnan(value):
                            row[filed] = int(row[filed] * 10000)
                    elif filed in ['S_DQ_VOLUME']:
                        if not np.isnan(value):
                            row[filed] = int(row[filed] * 100)
                    elif filed in ['S_DQ_AMOUNT']:
                        if not np.isnan(value):
                            row[filed] = int(row[filed] * 1000)

                doc = ASwsIndex(
                    sw_index_code=row['S_INFO_WINDCODE'],
                    time_tag=date_to_datetime(str(row['TRADE_DT'])),
                    pre_close=row['S_DQ_PRECLOSE'],
                    open=row['S_DQ_OPEN'],
                    high=row['S_DQ_HIGH'],
                    low=row['S_DQ_LOW'],
                    close=row['S_DQ_CLOSE'],
                    volume=row['S_DQ_VOLUME'],
                    amount=row['S_DQ_AMOUNT'],
                    index_pe=row['S_VAL_PE'],
                    index_pb=row['S_VAL_PB'],
                    index_free_float_market_capitalisation=row['S_DQ_MV'],
                    index_total_market_capitalisation=row['S_VAL_MV'])
                doc_list.append(doc)
                if len(doc_list) > 999:
                    ASwsIndex.objects.insert(doc_list)
                    doc_list = []
            else:
                ASwsIndex.objects.insert(doc_list)
예제 #16
0
 def save_share_capitalization(self):
     database = DatabaseName.STOCK_BASE_DATA.value
     with MongoConnect(database):
         doc_list = []
         for index, row in self.data_df.iterrows():
             # row_dict = {'security_code': row['S_INFO_WINDCODE'],
             #             'change_date': datetime.strptime(str(int(row['CHANGE_DT'])), "%Y%m%d"),
             #             'total_share': row['TOT_SHR'],
             #             'float_share': row['FLOAT_SHR'],
             #             'float_a_share': row['FLOAT_A_SHR'],
             #             'float_b_share': row['FLOAT_B_SHR'],
             #             'float_h_share': row['FLOAT_H_SHR']}
             doc = AShareCapitalization(security_code=row['S_INFO_WINDCODE'],
                                        change_date=datetime.strptime(str(int(row['CHANGE_DT'])), "%Y%m%d"),
                                        total_share=row['TOT_SHR'],
                                        float_share=row['FLOAT_SHR'],
                                        float_a_share=row['FLOAT_A_SHR'],
                                        float_b_share=row['FLOAT_B_SHR'],
                                        float_h_share=row['FLOAT_H_SHR'])
             doc_list.append(doc)
         AShareCapitalization.objects.insert(doc_list)
예제 #17
0
 def update_a_sws_index(self):
     database = DatabaseName.STOCK_BASE_DATA.value
     with MongoConnect(database):
         a_sws_index = ASwsIndex.objects().as_pymongo()
         field_list = [
             'sw_index_code', 'time_tag', 'pre_close', 'open', 'high',
             'low', 'close', 'volume', 'amount', 'index_pe', 'index_pb',
             'index_free_float_market_capitalisation',
             'index_total_market_capitalisation'
         ]
         self.a_sws_index_df = pd.DataFrame(a_sws_index).reindex(
             columns=field_list)
         self.a_sws_index_df[['pre_close', 'open', 'high', 'low',
                              'close']] = self.a_sws_index_df[[
                                  'pre_close', 'open', 'high', 'low',
                                  'close'
                              ]].div(10000)
         folder_name = LocalDataFolderName.SWS_INDEX.value
         path = LocalDataPath.path + folder_name + '/'
         data_name = folder_name
         save_data_to_hdf5(path, data_name, self.a_sws_index_df)
예제 #18
0
    def insert_security_code_list(self):
        stock_code_list = []
        for market in self.market_list:
            path = self.data_path + market + '/MultDate/'
            file_list = os.listdir(path)
            stock_code_list += [i.split('.')[0] + '.' + market for i in file_list]
            file_num = 0
            p = Pool(8)
            for file_name in file_list:
                file_num += 1
                print('完成数量:', file_num)
                p.apply_async(self.insert_security_code, args=(market, file_name, path))
            p.close()
            p.join()

        delist = list(set(self.data_dict.keys()).difference(set(stock_code_list)))
        with MongoConnect(self.database):
            for security_code in delist:
                with switch_collection(Kline, security_code) as KlineDaily_security_code:
                    doc_list = []
                    security_code_data = self.data_dict[security_code].set_index(["TRADE_DT"])
                    for index, row in security_code_data.iterrows():
                        if row['S_DQ_AMOUNT'] > 0:
                            date_int = int(index)
                            date_int = str(date_int)
                            time_tag = datetime.strptime(date_int, "%Y%m%d")
                            try:
                                pre_close = int(row['S_DQ_PRECLOSE'] * 10000)
                            except KeyError:
                                pre_close = None
                            doc = KlineDaily_security_code(time_tag=time_tag, pre_close=pre_close,
                                                           open=int(row['S_DQ_OPEN'] * 10000),
                                                           high=int(row['S_DQ_HIGH'] * 10000),
                                                           low=int(row['S_DQ_LOW'] * 10000),
                                                           close=int(row['S_DQ_CLOSE'] * 10000),
                                                           volume=int(row['S_DQ_VOLUME'] * 100),
                                                           amount=int(row['S_DQ_AMOUNT'] * 1000),
                                                           match_items=0, interest=0)
                            doc_list.append(doc)
                    KlineDaily_security_code.objects.insert(doc_list)
예제 #19
0
 def save_ic_analysis_result(self, factor_name):
     with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value):
         ic_df = self.ic_df.copy()
         p_value_df = self.p_value_df.copy()
         ic_df.index = ic_df.index.format()
         p_value_df.index = p_value_df.index.format()
         doc = FactorIcAnalysisResult(
             factor_name=factor_name,
             # 因子数据开始时间
             begin_date=self.factor.index[0],
             # 因子数据结束时间
             end_date=self.factor.index[-1],
             # IC信号衰减计算,index 是时间序列, columns是decay周期,[1, self.ic_decay], 闭区间
             ic=ic_df,
             # p值信号衰减计算,index 是时间序列, columns是decay周期,[1, self.ic_decay], 闭区间
             p_value=p_value_df,
             # IC均值、 IC标准差、 IC_IR比率、 IC > 0 占比、 | IC | > 0.02 占比(绝对值)、 偏度、 峰度、
             # 正相关显著比例、负相关显著比例、状态切换比例、同向比例
             # index_list=['ic_mean', 'ic_std', 'ic_ir', 'ic_ratio', 'ic_abs_ratio', 'ic_skewness', 'ic_kurtosis',
             #             'ic_positive_ratio', 'ic_negative_ratio', 'ic_change_ratio', 'ic_unchange_ratio', ]
             ic_result=self.ic_result)
         doc.save()
예제 #20
0
    def save_regression_analysis_result(self, factor_name):
        with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value):
            factor_return = self.factor_return.copy()
            factor_t_value = self.factor_t_value.copy()
            net_analysis_result = self.net_analysis_result
            factor_return.index = factor_return.index.format()
            factor_t_value.index = factor_t_value.index.format()
            net_analysis_result['cumsum'][
                'net_value_df'].index = net_analysis_result['cumsum'][
                    'net_value_df'].index.format()
            net_analysis_result['cumprod'][
                'net_value_df'].index = net_analysis_result['cumprod'][
                    'net_value_df'].index.format()
            net_analysis_result['cumsum'][
                'benchmark_df'].index = net_analysis_result['cumsum'][
                    'benchmark_df'].index.format()
            net_analysis_result['cumprod'][
                'benchmark_df'].index = net_analysis_result['cumprod'][
                    'benchmark_df'].index.format()

            doc = FactorRegressionAnalysisResult(
                factor_name=factor_name,
                # 因子数据开始时间
                begin_date=self.factor.index[0],
                # 因子数据结束时间
                end_date=self.factor.index[-1],
                # 因子收益率的自相关系数acf和偏自相关系数pacf,默认1-10阶,结果list len=11,取1-10个数
                acf_result=self.acf_result,
                # 因子收益率,单利,复利, 日收益率
                factor_return=factor_return,
                # 单因子检测的T值, Series, index为时间
                factor_t_value=factor_t_value,
                # 单因子检测的T值的统计值,'t_value_mean': 绝对值均值, 't_value_greater_two':绝对值序列大于2的占比
                factor_t_value_statistics=self.factor_t_value_statistics,
                # 净值分析结果
                net_analysis_result=self.net_analysis_result)
            doc.save()
예제 #21
0
        result.columns = factor_ic.keys()
        return result.div(result.sum(1), axis=0)


if __name__ == '__main__':
    factor_list = ['factor_ma5', 'factor_ma10']

    path = LocalDataPath.path + LocalDataFolderName.FACTOR.value + '/'
    factor_data = {}
    for factor_name in factor_list:
        factor_single_data = get_local_data(path, factor_name + '.h5')
        # 指数数据不全,需要删一部分因子数据
        factor_data[factor_name] = factor_single_data[
            factor_single_data.index < datetime(2020, 1, 1)]

    with MongoConnect(DatabaseName.MULTI_FACTOR_DATA.value):
        factor_ic = {}
        factor_return = {}
        for factor_name in factor_list:
            factor_regression_analysis_result = FactorRegressionAnalysisResult.objects(factor_name=factor_name) \
                .only('factor_name') \
                .only('begin_date') \
                .only('end_date') \
                .only('factor_return') \
                .as_pymongo()
            factor_return[factor_name] = pd.DataFrame(
                factor_regression_analysis_result[0]['factor_return'])
            factor_return[factor_name].index = pd.DatetimeIndex(
                factor_return[factor_name].index)

            factor_ic_result = FactorIcAnalysisResult.objects(factor_name=factor_name) \
예제 #22
0
# -*- coding: utf-8 -*-

# ------------------------------
# @Time    : 2019/11/21
# @Author  : gao
# @File    : update_finance_data.py
# @Project : AmazingQuant
# ------------------------------

from AmazingQuant.utils.mongo_connection_me import MongoConnect
from apps.server.database_server.database_field.field_a_share_finance_data import AShareIncome
from AmazingQuant.utils.performance_test import Timer

if __name__ == '__main__':
    database = 'stock_base_data'
    with MongoConnect(database):
        with Timer(True):
            security_code_list = AShareIncome.objects.distinct('security_code')
            data = AShareIncome.objects(security_code__in=security_code_list,
                                        statement_type=408009000)
            for i in data:
                print(i.security_code)