Exemple #1
0
from data_base.mongodb import MongoDB_io
import pandas as pd

m = MongoDB_io()
m.set_db('stock_min_data')
collection_list = m.list_collection_names()
m.close_MongoDB_connection()
collection_list.sort()
insert_date_df = pd.DataFrame()
for stock in collection_list:
    print(stock, collection_list.index(stock))
    m.set_db('stock_min_data')
    m.set_collection(stock)
    df = m.read_data_to_get_field(field={'DATETIME': 1})
    series_index = df.DATETIME.astype(str).apply(
        lambda x: x[:10]).drop_duplicates().tolist()
    insert_date_series = pd.Series(True, index=series_index)
    insert_date_series.name = stock
    insert_date_df = insert_date_df.append(insert_date_series)
    m.close_MongoDB_connection()
    pass

path = r'D:\code\factor_module\download_stock_min_data\stock_insert_situation.csv'
insert_date_df.to_csv(path)
class get_min_price_class(object):
    def __init__(self):
        self.m = MongoDB_io()
        self.m.set_db('stock_min_data')
        self.nothing = ''
        self.collection_list = self.m.list_collection_names()
        pass

    def download_and_insert(self, stock, start_date, end_date):
        m = self.m
        print(stock, start_date, end_date)
        min_data: pd.DataFrame = get_price(stock,
                                           start_date=start_date,
                                           end_date=end_date,
                                           frequency='minute',
                                           fields=None,
                                           skip_paused=False,
                                           fq=None,
                                           count=None)
        if min_data.shape[0] == 0:
            print(stock, ' is empty')
            return
        min_data.index.name = 'datetime'
        min_data.reset_index(inplace=True)
        min_data.columns = min_data.columns.map(lambda x: x.upper())
        min_data.rename({'MONEY': 'AMOUNT'}, axis=1, inplace=True)
        min_data.DATETIME = min_data.DATETIME.astype(str)
        m.set_collection(stock[:6])
        # m.insert_huge_dataframe_by_block_to_mongodb(min_data)
        m.insert_dataframe_to_mongodb(min_data)
        pass

    def check_stock_is_in_collection(self, stock):
        collection_list = self.collection_list
        if stock[:6] in collection_list:
            return True
        else:
            return False
        pass

    def get_collection_insert_date(self, collection):
        m = self.m
        m.set_collection(collection)
        df = m.read_data_to_get_field(field=['DATETIME'])
        date_list = df.DATETIME.astype(str).apply(
            lambda x: x[:10]).drop_duplicates().tolist()
        date_list.sort()
        return date_list
        pass

    # def multi_process_insert_min_data(stock_code_list,start_date,end_date,trade_date_list):
    #     process_list=[]
    #     sem=Semaphore(4)
    #     for stock in stock_code_list:
    #         print(stock)
    #         p=MyProcess(target=inserting_one_stock, args=(stock, start_date, end_date, trade_date_list), kwargs={'sem':sem})
    #         p.daemon=True
    #         p.start()
    #         process_list.append(p)
    #         pass
    #     for proc in process_list:
    #         proc.join()
    #         pass
    #     pass

    def single_process_insert_min_data(self, stock_code_list, trade_date_list):
        for stock in stock_code_list:
            print(stock, stock_code_list.index(stock))
            self.inserting_one_stock(stock, trade_date_list)
            pass
        pass

    def inserting_one_stock(self, stock, trade_date_list):
        start_date = trade_date_list[0]
        end_date = trade_date_list[-1]
        flag = self.check_stock_is_in_collection(stock)
        if not flag:
            self.download_and_insert(stock, start_date, end_date)
        else:
            date_list = self.get_collection_insert_date(stock[:6])
            last_insert_date = date_list[-1]
            if last_insert_date not in trade_date_list:
                return
            index = trade_date_list.index(last_insert_date)
            next_trade_date = trade_date_list[index + 1]
            self.download_and_insert(stock, next_trade_date,
                                     trade_date_list[-1])
        pass

    def insert_stock_min_data(self):
        logging_joinquant()
        stock_list = get_stock_code_list()
        stock_list.sort()
        stock_list = stock_list[200:500]
        dic = get_setting_start_end_date()
        start_date = dic['start_date']
        end_date = dic['end_date']
        trade_date_series: pd.Series = pd.Series(
            get_trade_date_list(start_date, end_date)).astype(str)
        trade_date_list = trade_date_series.tolist()
        self.single_process_insert_min_data(stock_list, trade_date_list)
        # multi_process_insert_min_data(stock_list,start_date,end_date,trade_date_list)
        pass