def edit_index_and_fill_na(df: pd.DataFrame): # region Description:edit columns and index m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_ipo_date') ipo_df = m.read_data_to_get_dataframe() ipo_df['stock_short_name'] = ipo_df.stock.apply(lambda x: x[:6]) map_series = ipo_df.set_index('stock_short_name').stock df = df.reindex(map_series.index, axis=1) df.columns = df.columns.map(lambda x: map_series[x]) df.loc['1990-01-01 00:00:00', :] = 1 df.sort_index(inplace=True) df.index = pd.to_datetime(df.index) # endregion # region Description: df.fillna(method='ffill', inplace=True) m.set_collection('stock_trade_date') trade_date = m.read_data_to_get_dataframe() trade_date_list = trade_date.date.tolist() df = df.reindex(trade_date_list, axis=0) df.fillna(method='ffill', inplace=True) # endregion return df pass
def get_sw_industry(): logging_joinquant() df=get_industries(name='sw_l1') df=df.append(get_industries(name='sw_l2')) df=df.append(get_industries(name='sw_l3')) df.index.name='industry_code' df.reset_index(inplace=True) # 插入数据库 m=MongoDB_io() m.set_db('stock_daily_data') m.set_collection('sw_industry_code') m.remove_all_documents_from_mongodb() m.insert_dataframe_to_mongodb(df)
def insert_index_data(): m = MongoDB_io() m.set_db('index_daily_data') m.set_collection('index_info') m.delete_document_include_condition() logging_joinquant() df = get_all_securities(types='index', date=None) df.index.name = 'index' df.reset_index(inplace=True) df.start_date = pd.to_datetime(df.start_date) df.end_date = pd.to_datetime(df.end_date) # 插入数据库 m.insert_huge_dataframe_by_block_to_mongodb(df) pass
def insert_zz500_data(): m = MongoDB_io() m.set_db('index_daily_data') m.set_collection('000905_XSHG') m.delete_document_include_condition() logging_joinquant() df = get_price('000905.XSHG', start_date='2005-01-01', end_date='2019-09-25', fq=None, frequency='daily') df.dropna(inplace=True) ## 指数没有复权一说 # df2=get_price('000905.XSHG',fq='pre') df.index.name = 'date' df.reset_index(inplace=True) df.date = pd.to_datetime(df.date) # 插入数据库 m.insert_huge_dataframe_by_block_to_mongodb(df) pass
from jqdatasdk import * import pandas as pd from data_base.mongodb import MongoDB_io auth('15915765128','87662638qjf') m=MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_sw_industry_code') sw_indus=m.read_data_to_get_dataframe() start_date='2010-01-01' trade_date_list=get_trade_days(start_date=start_date, end_date=None, count=None) group_day_num=1000 group_num=1 trade_date_list=get_trade_days(start_date=start_date, end_date=None, count=None) weight_df=pd.DataFrame() m=MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_ipo_date') ipo_df=m.read_data_to_get_dataframe() stock_list=ipo_df.stock.tolist() for date in trade_date_list: print(date) panel = get_price(stock_list, start_date=date, end_date=date, frequency='daily', fields=None, skip_paused=False, fq='none', count=None) df=panel.iloc[:,0,:] df.reset_index(inplace=True) df.rename(columns={'index':'stock'},inplace=True)
from data_base.mongodb import MongoDB_io auth('15915765128','87662638qjf') start_date='2005-01-01' trade_date_list=get_trade_days(start_date=start_date, end_date=None, count=None) trade_date_info_df=pd.DataFrame() trade_date_info_df['trade_date']=trade_date_list trade_date_info_df['weekday']=trade_date_info_df['trade_date'].apply(lambda x:x.weekday())+1.0 trade_date_info_df['trade_month']=trade_date_info_df['trade_date'].apply(lambda x:str(x)[:7]) def get_ordinal_of_date(x): x['ordinal_in_month']=range(x.shape[0]) x['ordinal_in_month']=x['ordinal_in_month']+1.0 return x pass trade_date_info_df=trade_date_info_df.groupby('trade_month').apply(get_ordinal_of_date) trade_date_info_df['trade_date']=pd.to_datetime(trade_date_info_df['trade_date']) # 插入数据库 m=MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_trade_date') m.insert_huge_dataframe_by_block_to_mongodb(trade_date_info_df) ## 后面加上更新验证模块。 pass
from jqdatasdk import * import pandas as pd from data_base.mongodb import MongoDB_io ## 查看数据插入到哪一天 m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_sw_industry_category') start_date, end_date = m.get_start_end_date() pass
from data_base.mongodb import MongoDB_io import pandas as pd m = MongoDB_io() m.set_db('stock_min_data') collection_list = m.list_collection_names() m.close_MongoDB_connection() collection_list.sort() insert_date_df = pd.DataFrame() for stock in collection_list: print(stock, collection_list.index(stock)) m.set_db('stock_min_data') m.set_collection(stock) df = m.read_data_to_get_field(field={'DATETIME': 1}) series_index = df.DATETIME.astype(str).apply( lambda x: x[:10]).drop_duplicates().tolist() insert_date_series = pd.Series(True, index=series_index) insert_date_series.name = stock insert_date_df = insert_date_df.append(insert_date_series) m.close_MongoDB_connection() pass path = r'D:\code\factor_module\download_stock_min_data\stock_insert_situation.csv' insert_date_df.to_csv(path)
from data_base.mongodb import MongoDB_io collection_list = [] m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection()
from jqdatasdk import * import pandas as pd from data_base.mongodb import MongoDB_io ## 查看数据插入到哪一天 m=MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_pre_price') start_date,end_date=m.get_start_end_date() pass
from jqdatasdk import * from data_base.mongodb import MongoDB_io auth('15915765128', '87662638qjf') start_date = '2005-01-01' df = get_industries(name='sw_l1') df = df.append(get_industries(name='sw_l2')) df = df.append(get_industries(name='sw_l3')) df.index.name = 'industry_code' df.reset_index(inplace=True) pass # 插入数据库 m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_sw_industry_code') m.insert_huge_dataframe_by_block_to_mongodb(df) ## 后面加上更新验证模块。 pass
count=None) weight_df = pd.DataFrame() for date in trade_date_list[(group_num - 1) * group_day_num:group_num * group_day_num]: print(date) q = query(valuation).filter(valuation.market_cap > 0) df = get_fundamentals(q, date) df = df.loc[:, [ 'code', 'day', 'market_cap', 'circulating_cap', 'circulating_market_cap' ]] df.rename(columns={'day': 'date'}, inplace=True) # 打印出总市值 weight_df = weight_df.append(df) pass ## df 格式修改 weight_df.date = pd.to_datetime(weight_df.date) if 'index' in weight_df.columns: weight_df.drop('index', axis=1, inplace=True) pass print('transfer done') # 插入数据库 m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_capital_data') m.insert_huge_dataframe_by_block_to_mongodb(weight_df) pass
from jqdatasdk import * import pandas as pd from data_base.mongodb import MongoDB_io auth('15915765128', '87662638qjf') df = get_all_securities(types=[], date=None) df.index.name = 'stock' df.reset_index(inplace=True) df.start_date = pd.to_datetime(df.start_date) df.end_date = pd.to_datetime(df.end_date) # 插入数据库 m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_ipo_date') m.insert_huge_dataframe_by_block_to_mongodb(df) ## 后面加上更新验证模块。 pass
from jqdatasdk import * import pandas as pd from data_base.mongodb import MongoDB_io auth('15915765128', '87662638qjf') m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection('stock_sw_industry_code') sw_indus = m.read_data_to_get_dataframe() m.set_collection('stock_trade_date') trade_day_df = m.read_data_to_get_dataframe() trade_day_df = trade_day_df[ trade_day_df.trade_date > pd.to_datetime('2010-01-01')] trade_day_list = trade_day_df.trade_date.astype(str) industry_code_list = sw_indus.industry_code.iloc[:34].tolist() industry_stock_grouping = pd.DataFrame() for date in trade_day_list: print(date) daily_industry_series = pd.Series() for industry_code in industry_code_list[:]: stock_list = get_industry_stocks(industry_code, date=date) daily_industry_series = daily_industry_series.append( pd.Series(industry_code, index=stock_list)) pass daily_industry_series.name = pd.to_datetime(date) daily_industry_df = daily_industry_series.to_frame() daily_industry_df_stack_up = daily_industry_df.stack().reset_index() daily_industry_df_stack_up.columns = ['stock', 'date', 'industry_category']
# 'stock_sw_industry_code', # 'stock_trade_date', # 'zz500_weight'] # db_list=['stock_capital_data', # 'stock_post_price', # 'stock_real_price', # 'stock_price_adj_factor', # 'stock_sw_industry_code', # 'zz500_weight'] db_list = [ # 'stock_capital_data', # 'stock_sw_industry_code', 'stock_real_price_', # 'zz500_weight' ] m = MongoDB_io() m.set_db('stock_daily_data') condition_df = pd.DataFrame() for db in db_list: print(db) m.set_collection(db) document_per_date = m.get_document_in_date() document_per_date.name = db condition_df = condition_df.append(document_per_date) pass # condition_df=condition_df.loc[:,'2010-01-01':] condition_df.to_csv('update_condition.csv')
start_date = '2005-01-01' group_day_num = 1000 group_num = 4 trade_date_list = get_trade_days(start_date=start_date, end_date=None, count=None) weight_df = pd.DataFrame() for date in trade_date_list[(group_num - 1) * group_day_num:group_num * group_day_num]: print(date) weight_df = weight_df.append( get_index_weights('000905.XSHG', date=date).reset_index()) pass ## df 格式修改 weight_df.date = pd.to_datetime(weight_df.date) if 'index' in weight_df.columns: weight_df.drop('index', axis=1, inplace=True) pass print('transfer done') # 插入数据库 m = MongoDB_io() m.set_db('stock_daily_data') m.set_collection('zz500_weight') m.insert_huge_dataframe_by_block_to_mongodb(weight_df) pass