def edit_index_and_fill_na(df: pd.DataFrame): # region Description:edit columns and index m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_ipo_date') ipo_df = m.read_data_to_get_dataframe() ipo_df['stock_short_name'] = ipo_df.stock.apply(lambda x: x[:6]) map_series = ipo_df.set_index('stock_short_name').stock df = df.reindex(map_series.index, axis=1) df.columns = df.columns.map(lambda x: map_series[x]) df.loc['1990-01-01 00:00:00', :] = 1 df.sort_index(inplace=True) df.index = pd.to_datetime(df.index) # endregion # region Description: df.fillna(method='ffill', inplace=True) m.set_collection('stock_trade_date') trade_date = m.read_data_to_get_dataframe() trade_date_list = trade_date.date.tolist() df = df.reindex(trade_date_list, axis=0) df.fillna(method='ffill', inplace=True) # endregion return df pass
def get_sw_industry(): logging_joinquant() df=get_industries(name='sw_l1') df=df.append(get_industries(name='sw_l2')) df=df.append(get_industries(name='sw_l3')) df.index.name='industry_code' df.reset_index(inplace=True) # 插入数据库 m=MongoDB_io() m.set_db('stock_daily_data') m.set_collection('sw_industry_code') m.remove_all_documents_from_mongodb() m.insert_dataframe_to_mongodb(df)
def insert_index_data(): m = MongoDB_io() m.set_db('index_daily_data') m.set_collection('index_info') m.delete_document_include_condition() logging_joinquant() df = get_all_securities(types='index', date=None) df.index.name = 'index' df.reset_index(inplace=True) df.start_date = pd.to_datetime(df.start_date) df.end_date = pd.to_datetime(df.end_date) # 插入数据库 m.insert_huge_dataframe_by_block_to_mongodb(df) pass
def insert_zz500_data(): m = MongoDB_io() m.set_db('index_daily_data') m.set_collection('000905_XSHG') m.delete_document_include_condition() logging_joinquant() df = get_price('000905.XSHG', start_date='2005-01-01', end_date='2019-09-25', fq=None, frequency='daily') df.dropna(inplace=True) ## 指数没有复权一说 # df2=get_price('000905.XSHG',fq='pre') df.index.name = 'date' df.reset_index(inplace=True) df.date = pd.to_datetime(df.date) # 插入数据库 m.insert_huge_dataframe_by_block_to_mongodb(df) pass
from jqdatasdk import * import pandas as pd from data_base.mongodb import MongoDB_io auth('15915765128','87662638qjf') m=MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_sw_industry_code') sw_indus=m.read_data_to_get_dataframe() start_date='2010-01-01' trade_date_list=get_trade_days(start_date=start_date, end_date=None, count=None) group_day_num=1000 group_num=1 trade_date_list=get_trade_days(start_date=start_date, end_date=None, count=None) weight_df=pd.DataFrame() m=MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_ipo_date') ipo_df=m.read_data_to_get_dataframe() stock_list=ipo_df.stock.tolist() for date in trade_date_list: print(date) panel = get_price(stock_list, start_date=date, end_date=date, frequency='daily', fields=None, skip_paused=False, fq='none', count=None) df=panel.iloc[:,0,:] df.reset_index(inplace=True) df.rename(columns={'index':'stock'},inplace=True)
from data_base.mongodb import MongoDB_io import pandas as pd m = MongoDB_io() m.set_db('stock_min_data') collection_list = m.list_collection_names() m.close_MongoDB_connection() collection_list.sort() insert_date_df = pd.DataFrame() for stock in collection_list: print(stock, collection_list.index(stock)) m.set_db('stock_min_data') m.set_collection(stock) df = m.read_data_to_get_field(field={'DATETIME': 1}) series_index = df.DATETIME.astype(str).apply( lambda x: x[:10]).drop_duplicates().tolist() insert_date_series = pd.Series(True, index=series_index) insert_date_series.name = stock insert_date_df = insert_date_df.append(insert_date_series) m.close_MongoDB_connection() pass path = r'D:\code\factor_module\download_stock_min_data\stock_insert_situation.csv' insert_date_df.to_csv(path)
class get_min_price_class(object): def __init__(self): self.m = MongoDB_io() self.m.set_db('stock_min_data') self.nothing = '' self.collection_list = self.m.list_collection_names() pass def download_and_insert(self, stock, start_date, end_date): m = self.m print(stock, start_date, end_date) min_data: pd.DataFrame = get_price(stock, start_date=start_date, end_date=end_date, frequency='minute', fields=None, skip_paused=False, fq=None, count=None) if min_data.shape[0] == 0: print(stock, ' is empty') return min_data.index.name = 'datetime' min_data.reset_index(inplace=True) min_data.columns = min_data.columns.map(lambda x: x.upper()) min_data.rename({'MONEY': 'AMOUNT'}, axis=1, inplace=True) min_data.DATETIME = min_data.DATETIME.astype(str) m.set_collection(stock[:6]) # m.insert_huge_dataframe_by_block_to_mongodb(min_data) m.insert_dataframe_to_mongodb(min_data) pass def check_stock_is_in_collection(self, stock): collection_list = self.collection_list if stock[:6] in collection_list: return True else: return False pass def get_collection_insert_date(self, collection): m = self.m m.set_collection(collection) df = m.read_data_to_get_field(field=['DATETIME']) date_list = df.DATETIME.astype(str).apply( lambda x: x[:10]).drop_duplicates().tolist() date_list.sort() return date_list pass # def multi_process_insert_min_data(stock_code_list,start_date,end_date,trade_date_list): # process_list=[] # sem=Semaphore(4) # for stock in stock_code_list: # print(stock) # p=MyProcess(target=inserting_one_stock, args=(stock, start_date, end_date, trade_date_list), kwargs={'sem':sem}) # p.daemon=True # p.start() # process_list.append(p) # pass # for proc in process_list: # proc.join() # pass # pass def single_process_insert_min_data(self, stock_code_list, trade_date_list): for stock in stock_code_list: print(stock, stock_code_list.index(stock)) self.inserting_one_stock(stock, trade_date_list) pass pass def inserting_one_stock(self, stock, trade_date_list): start_date = trade_date_list[0] end_date = trade_date_list[-1] flag = self.check_stock_is_in_collection(stock) if not flag: self.download_and_insert(stock, start_date, end_date) else: date_list = self.get_collection_insert_date(stock[:6]) last_insert_date = date_list[-1] if last_insert_date not in trade_date_list: return index = trade_date_list.index(last_insert_date) next_trade_date = trade_date_list[index + 1] self.download_and_insert(stock, next_trade_date, trade_date_list[-1]) pass def insert_stock_min_data(self): logging_joinquant() stock_list = get_stock_code_list() stock_list.sort() stock_list = stock_list[200:500] dic = get_setting_start_end_date() start_date = dic['start_date'] end_date = dic['end_date'] trade_date_series: pd.Series = pd.Series( get_trade_date_list(start_date, end_date)).astype(str) trade_date_list = trade_date_series.tolist() self.single_process_insert_min_data(stock_list, trade_date_list) # multi_process_insert_min_data(stock_list,start_date,end_date,trade_date_list) pass