Beispiel #1
0
    def update_index_data(self, end=datetime.now()):
        """

        :param end:
        :return:
        """
        get_collection_list = GetCollectionList()
        index_list = get_collection_list.get_index_list()
        self.end = end
        database = DatabaseName.INDEX_KLINE_DAILY.value
        with MongoConnect(database):
            index_data_dict = {}
            for index_code in index_list:
                with switch_collection(Kline, index_code) as KlineDaily_index_code:
                    security_code_data = KlineDaily_index_code.objects(time_tag__lte=self.end).as_pymongo()
                    security_code_data_df = pd.DataFrame(list(security_code_data)).reindex(columns=self.field)
                    security_code_data_df.set_index(["time_tag"], inplace=True)
                    index_data_dict[index_code] = security_code_data_df
        field_data_dict = {}
        for i in self.field:
            if i != 'time_tag':
                field_data_pd = pd.DataFrame({key: value[i] for key, value in index_data_dict.items()})
                # 原始数据的开高低收除以10000
                if i in ['open', 'high', 'low', 'close']:
                    field_data_dict[i] = field_data_pd.div(10000)
                else:
                    field_data_dict[i] = field_data_pd
        folder_name = LocalDataFolderName.MARKET_DATA.value
        sub_folder_name = LocalDataFolderName.KLINE_DAILY.value
        sub_sub_folder_name = LocalDataFolderName.INDEX.value
        for field in self.field:
            if field not in ['time_tag', 'interest']:
                path = LocalDataPath.path + folder_name + '/' + sub_folder_name + '/' + sub_sub_folder_name + '/'
                data_name = field
                save_data_to_hdf5(path, data_name, pd.DataFrame(field_data_dict[field]))
    def save_a_share_cash_flow(self):
        database = 'stock_base_data'
        with MongoConnect(database):
            doc_list = []
            for index, row in self.data_df.iterrows():
                row_dict = dict(row)
                row_dict['security_code'] = row_dict['S_INFO_WINDCODE']
                row_dict.pop('WIND_CODE')
                row_dict.pop('OBJECT_ID')
                row_dict.pop('S_INFO_WINDCODE')

                doc = AShareCashFlow()

                for key, value in row_dict.items():
                    if key.lower() in self.collection_property_list:
                        property_name = AShareCashFlow.__dict__[key.lower()]
                        if isinstance(property_name, StringField):
                            setattr(doc, key.lower(), str(value))
                        elif isinstance(property_name, DateTimeField):
                            if np.isnan(value):
                                setattr(doc, key.lower(), None)
                            else:
                                setattr(
                                    doc, key.lower(),
                                    datetime.strptime(str(int(value)),
                                                      "%Y%m%d"))
                        else:
                            setattr(doc, key.lower(), value)
                doc_list.append(doc)
                if len(doc_list) > 999:
                    AShareCashFlow.objects.insert(doc_list)
                    doc_list = []
            else:
                AShareCashFlow.objects.insert(doc_list)
Beispiel #3
0
 def _get_data_with_process_pool(self, database, security_list, process_manager_dict, security_list_i):
     with MongoConnect(database):
         thread_data_dict = {}
         for stock in security_list:
             with switch_collection(Kline, stock) as KlineDaily_security_code:
                 security_code_data = KlineDaily_security_code.objects(time_tag__lte=self.end).as_pymongo()
                 security_code_data_df = pd.DataFrame(list(security_code_data)).reindex(columns=self.field)
                 security_code_data_df.set_index(["time_tag"], inplace=True)
                 thread_data_dict[stock] = security_code_data_df.reindex(self.calendar_SZ).fillna(method='ffill')
         process_manager_dict[security_list_i] = thread_data_dict
 def update_index_members(self):
     database = DatabaseName.STOCK_BASE_DATA.value
     with MongoConnect(database):
         index_members_data = AShareIndexMembers.objects().as_pymongo()
         field_list = ['index_code', 'security_code', 'in_date', 'out_date']
         self.index_members_df = pd.DataFrame(
             list(index_members_data)).reindex(columns=field_list)
         folder_name = LocalDataFolderName.INDEX_MEMBER.value
         path = LocalDataPath.path + folder_name + '/'
         data_name = folder_name
         save_data_to_hdf5(path, data_name, self.index_members_df)
Beispiel #5
0
    def save_a_share_adj_factor_right(self):
        """
        取当日收盘价,作为转、送的股价,
        再计算复权因子更新到AShareExRightDividend, 复权因子adj_factor
        比例 = 送股比例 + 转增比例 + 缩减比例
        单次复权因子 = 股权登记日收盘价 * (1 + 比例 + 配股比例 + 增发比例) /
        (股权登记日收盘价 - 派息比例 + 股权登记日收盘价 * 比例 + 配股价格 * 配股比例 + 增发价格 * 增发比例)
        :return:
        """
        kline_object = GetKlineData()
        all_market_data = kline_object.cache_all_stock_data()
        with MongoConnect(self.database):
            self.data = pd.DataFrame(
                AShareExRightDividend.objects.as_pymongo())
            self.data['close'] = self.data.apply(
                lambda x: self.get_adj_day_close(x['security_code'], x[
                    'ex_date'], all_market_data),
                axis=1)
            self.data = self.data.fillna(0)
            ratio = self.data['bonus_share_ratio'] + self.data[
                'conversed_ratio'] + self.data['consolidate_split_ratio']
            self.data['adj_factor'] = self.data['close'] * (
                1 + ratio + self.data['rightsissue_ratio'] +
                self.data['seo_ratio']
            ) / (self.data['close'] - self.data['cash_dividend_ratio'] +
                 self.data['close'] * ratio + self.data['rightsissue_price'] *
                 self.data['rightsissue_ratio'] +
                 self.data['seo_price'] * self.data['seo_ratio'])

            folder_name = LocalDataFolderName.ADJ_FACTOR.value
            path = LocalDataPath.path + folder_name + '/'
            self.data = self.data.reindex(
                columns=['security_code', 'ex_date', 'adj_factor'])
            self.data.set_index(["ex_date"], inplace=True)
            self.data.sort_index(inplace=True)
            calendar_obj = GetCalendar()
            calendar = calendar_obj.get_calendar('SZ')
            backward_factor = pd.DataFrame(index=calendar)
            adj_factor = pd.DataFrame(index=calendar)
            data_dict = dict(
                list(self.data.groupby(self.data['security_code'])))
            for security_code, adj_data in data_dict.items():
                backward_factor[security_code] = self.cal_backward_factor(
                    adj_data['adj_factor'])
                adj_factor[security_code] = adj_data['adj_factor']
            backward_factor.replace([np.inf, 0], np.nan, inplace=True)
            backward_factor.fillna(method='ffill', inplace=True)
            backward_factor.fillna(1, inplace=True)
            save_data_to_hdf5(path, AdjustmentFactor.BACKWARD_ADJ_FACTOR.value,
                              backward_factor)
            save_data_to_hdf5(path, AdjustmentFactor.FROWARD_ADJ_FACTOR.value,
                              backward_factor.div(backward_factor.iloc[-1]))
 def update_calendar_hdf5(self):
     with MongoConnect(self.database):
         data = AShareCalendar.objects().as_pymongo()
         data_df = pd.DataFrame(data)
         data_df.set_index('market', inplace=True)
         data_df = data_df.drop(['_id', 'update_date'], axis=1)
         folder_name = LocalDataFolderName.CALENDAR.value
         for index, row in data_df.iterrows():
             path = LocalDataPath.path + folder_name + '/'
             data_name = folder_name + '_' + str(index)
             save_data_to_hdf5(
                 path, data_name,
                 pd.DataFrame(data_df.loc[index, 'trade_days']))
    def insert_security_code_list(self):
        stock_code_list = []
        for market in self.market_list:
            path = self.data_path + market + '/MultDate/'
            file_list = os.listdir(path)
            stock_code_list += [
                i.split('.')[0] + '.' + market for i in file_list
            ]
            file_num = 0
            p = Pool(8)
            for file_name in file_list:
                file_num += 1
                print('完成数量:', file_num)
                p.apply_async(self.insert_security_code,
                              args=(market, file_name, path))
            p.close()
            p.join()

        delist = list(
            set(self.data_dict.keys()).difference(set(stock_code_list)))
        with MongoConnect(self.database):
            for security_code in delist:
                with switch_collection(
                        Kline, security_code) as KlineDaily_security_code:
                    doc_list = []
                    security_code_data = self.data_dict[
                        security_code].set_index(["TRADE_DT"])
                    for index, row in security_code_data.iterrows():
                        if row['S_DQ_AMOUNT'] > 0:
                            date_int = int(index)
                            date_int = str(date_int)
                            time_tag = datetime.strptime(date_int, "%Y%m%d")
                            try:
                                pre_close = int(row['S_DQ_PRECLOSE'] * 10000)
                            except KeyError:
                                pre_close = None
                            doc = KlineDaily_security_code(
                                time_tag=time_tag,
                                pre_close=pre_close,
                                open=int(row['S_DQ_OPEN'] * 10000),
                                high=int(row['S_DQ_HIGH'] * 10000),
                                low=int(row['S_DQ_LOW'] * 10000),
                                close=int(row['S_DQ_CLOSE'] * 10000),
                                volume=int(row['S_DQ_VOLUME'] * 100),
                                amount=int(row['S_DQ_AMOUNT'] * 1000),
                                match_items=0,
                                interest=0)
                            doc_list.append(doc)
                    KlineDaily_security_code.objects.insert(doc_list)
 def save_a_share_calendar(self):
     database = DatabaseName.STOCK_BASE_DATA.value
     with MongoConnect(database):
         doc_list = []
         data_grouped = self.data_df.groupby("S_INFO_EXCHMARKET")
         data_dict = {i[0]: list(i[1]['TRADE_DAYS']) for i in data_grouped}
         for market, trade_days in data_dict.items():
             if market == 'SSE':
                 market = 'SH'
             elif market == 'SZSE':
                 market = 'SZ'
             trade_days = [date_to_datetime(str(i)) for i in sorted(trade_days)]
             doc = AShareCalendar(market=market, trade_days=trade_days)
             doc_list.append(doc)
         AShareCalendar.objects.insert(doc_list)
    def insert_security_code(self, market, file_name, path):
        with MongoConnect(self.database):
            print(path + file_name + '\n')
            kline_daily_data = pd.read_csv(path + file_name,
                                           encoding='unicode_escape')
            security_code = file_name.split('.')[0] + '.' + market
            if is_security_type(security_code, 'EXTRA_STOCK_A'):
                kline_daily_data = kline_daily_data.reindex(columns=[
                    'date', 'open', 'high', 'low', 'close', 'volumw',
                    'turover', 'match_items', 'interest'
                ])
                kline_daily_data.rename(columns={
                    'volumw': 'volume',
                    'turover': 'amount'
                },
                                        inplace=True)
                kline_daily_data = kline_daily_data[
                    kline_daily_data.date >= 20020104]
                with switch_collection(
                        Kline, security_code) as KlineDaily_security_code:
                    doc_list = []
                    security_code_data = pd.DataFrame()
                    if security_code in self.data_dict.keys():
                        security_code_data = self.data_dict[
                            security_code].set_index(["TRADE_DT"])
                        security_code_data = security_code_data.fillna(0)
                    for index, row in kline_daily_data.iterrows():
                        date_int = int(row['date'])
                        if not np.isnan(date_int):
                            try:
                                pre_close = int(
                                    10000 *
                                    security_code_data.loc[date_int,
                                                           'S_DQ_PRECLOSE'])
                            except KeyError:
                                pre_close = None
                            date_int = str(date_int)
                            time_tag = datetime.strptime(date_int, "%Y%m%d")
                            doc = KlineDaily_security_code(
                                time_tag=time_tag,
                                pre_close=pre_close,
                                open=int(row['open']),
                                high=int(row['high']),
                                low=int(row['low']),
                                close=int(row['close']),
                                volume=int(row['volume']),
                                amount=int(row['amount']),
                                match_items=int(row['match_items']),
                                interest=int(row['interest']))
                            doc_list.append(doc)

                    # 用csv全表补充20020104之前的日线数据,match_items为0
                    security_code_data = security_code_data[
                        security_code_data.index < 20020104]
                    for index, row in security_code_data.iterrows():
                        if row['S_DQ_AMOUNT'] > 0:
                            date_int = int(index)
                            date_int = str(date_int)
                            time_tag = datetime.strptime(date_int, "%Y%m%d")
                            try:
                                pre_close = int(row['S_DQ_PRECLOSE'] * 10000)
                            except KeyError:
                                pre_close = None
                            doc = KlineDaily_security_code(
                                time_tag=time_tag,
                                pre_close=pre_close,
                                open=int(row['S_DQ_OPEN'] * 10000),
                                high=int(row['S_DQ_HIGH'] * 10000),
                                low=int(row['S_DQ_LOW'] * 10000),
                                close=int(row['S_DQ_CLOSE'] * 10000),
                                volume=int(row['S_DQ_VOLUME'] * 100),
                                amount=int(row['S_DQ_AMOUNT'] * 1000),
                                match_items=0,
                                interest=0)
                            doc_list.append(doc)
                    KlineDaily_security_code.objects.insert(doc_list)
Beispiel #10
0
# -*- coding: utf-8 -*-

# ------------------------------
# @Time    : 2019/11/21
# @Author  : gao
# @File    : update_finance_data.py
# @Project : AmazingQuant
# ------------------------------

from AmazingQuant.data_center.mongo_connection_me import MongoConnect
from AmazingQuant.data_center.database_field.field_a_share_finance_data import AShareIncome, AShareCashFlow
from AmazingQuant.utils.performance_test import Timer

if __name__ == '__main__':
    database = 'stock_base_data'
    with MongoConnect(database):
        with Timer(True):
            security_code_list = AShareIncome.objects.distinct('security_code')
            data = AShareIncome.objects(security_code__in=security_code_list,
                                        statement_type=408009000)
            for i in data:
                print(i.security_code)
    def insert_security_code(self, market, file_name, path):
        database = DatabaseName.INDEX_KLINE_DAILY.value
        with MongoConnect(database):
            print(path + file_name + '\n')
            kline_daily_data = pd.read_csv(path + file_name,
                                           encoding='unicode_escape')
            code = file_name.split('.')[0]
            code_transfer_dict = {
                '999999': '000001',
                '999998': '000002',
                '999997': '000003',
                '999996': '000004',
                '999995': '000005',
                '999994': '000006',
                '999993': '000007',
                '999992': '000008',
                '999991': '000010',
                '999990': '000011',
                '999989': '000012',
                '999988': '000013',
                '999987': '000016',
                '999986': '000015',
                '000300': '000300'
            }
            if market == 'SH':
                if code in code_transfer_dict.keys():
                    code = code_transfer_dict[code]
                else:
                    code = '00' + code[2:]
            security_code = code + '.' + market
            kline_daily_data = kline_daily_data.reindex(columns=[
                'date', 'open', 'high', 'low', 'close', 'volumw', 'turover',
                'match_items', 'interest'
            ])
            kline_daily_data.rename(columns={
                'volumw': 'volume',
                'turover': 'amount'
            },
                                    inplace=True)

            with switch_collection(Kline,
                                   security_code) as KlineDaily_security_code:
                doc_list = []
                for index, row in kline_daily_data.iterrows():
                    date_int = int(row['date'])
                    if not np.isnan(date_int):
                        date_int = str(date_int)
                        time_tag = datetime.strptime(date_int, "%Y%m%d")
                        doc = KlineDaily_security_code(
                            time_tag=time_tag,
                            pre_close=None,
                            open=int(row['open']),
                            high=int(row['high']),
                            low=int(row['low']),
                            close=int(row['close']),
                            volume=int(row['volume']),
                            amount=int(row['amount']),
                            match_items=int(row['match_items']),
                            interest=int(row['interest']))
                        doc_list.append(doc)

                KlineDaily_security_code.objects.insert(doc_list)