def edit_index_and_fill_na(df: pd.DataFrame): # region Description:edit columns and index m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_ipo_date') ipo_df = m.read_data_to_get_dataframe() ipo_df['stock_short_name'] = ipo_df.stock.apply(lambda x: x[:6]) map_series = ipo_df.set_index('stock_short_name').stock df = df.reindex(map_series.index, axis=1) df.columns = df.columns.map(lambda x: map_series[x]) df.loc['1990-01-01 00:00:00', :] = 1 df.sort_index(inplace=True) df.index = pd.to_datetime(df.index) # endregion # region Description: df.fillna(method='ffill', inplace=True) m.set_collection('stock_trade_date') trade_date = m.read_data_to_get_dataframe() trade_date_list = trade_date.date.tolist() df = df.reindex(trade_date_list, axis=0) df.fillna(method='ffill', inplace=True) # endregion return df pass
from jqdatasdk import * import pandas as pd from data_base.mongodb import MongoDB_io auth('15915765128','87662638qjf') m=MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_sw_industry_code') sw_indus=m.read_data_to_get_dataframe() start_date='2010-01-01' trade_date_list=get_trade_days(start_date=start_date, end_date=None, count=None) group_day_num=1000 group_num=1 trade_date_list=get_trade_days(start_date=start_date, end_date=None, count=None) weight_df=pd.DataFrame() m=MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_ipo_date') ipo_df=m.read_data_to_get_dataframe() stock_list=ipo_df.stock.tolist() for date in trade_date_list: print(date) panel = get_price(stock_list, start_date=date, end_date=date, frequency='daily', fields=None, skip_paused=False, fq='none', count=None) df=panel.iloc[:,0,:] df.reset_index(inplace=True) df.rename(columns={'index':'stock'},inplace=True)
from jqdatasdk import * import pandas as pd from data_base.mongodb import MongoDB_io auth('15915765128', '87662638qjf') m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_sw_industry_code') sw_indus = m.read_data_to_get_dataframe() m.set_collection('stock_trade_date') trade_day_df = m.read_data_to_get_dataframe() trade_day_df = trade_day_df[ trade_day_df.trade_date > pd.to_datetime('2010-01-01')] trade_day_list = trade_day_df.trade_date.astype(str) industry_code_list = sw_indus.industry_code.iloc[:34].tolist() industry_stock_grouping = pd.DataFrame() for date in trade_day_list: print(date) daily_industry_series = pd.Series() for industry_code in industry_code_list[:]: stock_list = get_industry_stocks(industry_code, date=date) daily_industry_series = daily_industry_series.append( pd.Series(industry_code, index=stock_list)) pass daily_industry_series.name = pd.to_datetime(date) daily_industry_df = daily_industry_series.to_frame() daily_industry_df_stack_up = daily_industry_df.stack().reset_index() daily_industry_df_stack_up.columns = ['stock', 'date', 'industry_category']