def sample_day(self, ticker, force_reload=False): ticker = ticker[:2] #date range the same as min block days = self.trading_days ticker_info = Ticker() db_name_min = ticker_info.get_dbname(ticker, level='min') db_name_day = ticker_info.get_dbname(ticker, level='day') table_name = ticker print(db_name_day, table_name) day_table = data_model_day(db_name_day, table_name) if force_reload and day_table.check_table_exist(): print('[warning] drop day table [{}]'.format(table_name)) day_table.drop_table(table_name) elif day_table.check_table_exist(): print('table already exist!') return 0 day_table.create_table() def sample_sub_day_list(sample_days): # print sample_days for iday in sample_days: min_table = data_model_min(db_name_min, str(iday)) if min_table.check_table_exist(): print(iday) min_df_all = pd.read_sql_table(str(iday), min_table.engine, index_col=['spot']) for ticker_id, min_df in min_df_all.groupby('id'): open_price = float(min_df.ix[0, 'OpenPrice']) close_price = float(min_df.ix[len(min_df) - 1, 'ClosePrice']) high_price = float(min_df['HighPrice'].max()) low_price = float(min_df['LowPrice'].min()) volume = int(min_df.ix[len(min_df) - 1, 'Volume']) open_interest = int(min_df.ix[len(min_df) - 1, 'OpenInterest']) to_be_inserted_list = (ticker_id, int(iday), open_price, high_price, low_price, close_price, volume, open_interest) to_be_inserted_dict = dict( list(zip(day_columns, to_be_inserted_list))) day_table.insert_dictlike(day_table.day_struct, to_be_inserted_dict, merge=True) sub_day_list = list( map( list, np.split(days, [ len(days) / default_subprocess_numbers * i for i in range(1, default_subprocess_numbers) ]))) run_paralell_tasks(sample_sub_day_list, sub_day_list)
def set_order_cffex(self, ticker='if', method='fixed_days', fixed_days=3, force_reload=False): #use min tables instead of tick tables to accelerate! tick_info = Ticker() dbname = tick_info.get_dbname(ticker, 'min') dates = FutureDates() rolling_day = self.get_exchange_rollling_day_cffex(offset=fixed_days) exchang_rolling_day = self.get_exchange_rollling_day_cffex(offset=0) # print rolling_day,exchang_rolling_day trading_day_list = dates.get_trading_day_list() #if date order already exists, then skip print('force_reload = ', force_reload) if force_reload: all_records = self.query_obj(self.future_order_struct) self.delete_lists_obj(all_records) exists_order_dates = [] else: exists_order_dates = set([ int(x.date) for x in self.query_obj(self.future_order_struct) ]) for date in trading_day_list: cffex_table_obj = data_model_min(db_name=dbname, table_name=str(date)) if not cffex_table_obj.check_table_exist(): continue year, month, day = get_year_month_day(date) if not force_reload: if date in exists_order_dates: continue print(date, rolling_day[year][month], exchang_rolling_day[year][month]) if method == 'fixed_days': sql = 'select distinct id from {}.{} order by id asc;'.format( dbname, str(date)) tickers = cffex_table_obj.execute_sql(sql) orders = [irec[0] for irec in tickers] if date > int(rolling_day[year][month]) and date <= int( exchang_rolling_day[year][month]): orders[0], orders[1] = orders[1], orders[0] to_be_inserted = [ date, ] to_be_inserted.extend(orders) print(to_be_inserted) self.insert_listlike(self.future_order_struct, to_be_inserted, True)
def debug_single_day_min(ticker, day=20140314, freq=120): spots_gap = 120 * freq ticker_info = Ticker() day_mode = DayMode() total_spots_tick = day_mode.cffex_last if ticker[: 2] in cffex_tickers else day_mode.other_last total_spots_min = int(total_spots_tick / spots_gap) db_name_tick = ticker_info.get_dbname(ticker, level='tick') tick_table = data_model_tick(db_name_tick, str(day)) tick_df_all = pd.read_sql_table(str(day), tick_table.engine, index_col=['spot']) for id, tick_df in tick_df_all.groupby('id'): print(id) min_df = pd.DataFrame(index=list(range(total_spots_min)), columns=min_columns) min_df.ix[:, 'id'] = id min_df.ix[:, 'day'] = day min_df.ix[:, 'spot'] = min_df.index for tick_spot in range(0, total_spots_tick - spots_gap, spots_gap): try: min_spot = tick_spot / spots_gap min_df.ix[min_spot, 'Time'] = tick_df.ix[tick_spot, 'Time'].split('.')[0] min_df.ix[min_spot, 'OpenPrice'] = float(tick_df.ix[tick_spot, 'LastPrice']) min_df.ix[min_spot, 'HighPrice'] = float( tick_df.ix[tick_spot:tick_spot + spots_gap - 1, 'LastPrice'].max()) min_df.ix[min_spot, 'LowPrice'] = float( tick_df.ix[tick_spot:tick_spot + spots_gap - 1, 'LastPrice'].min()) min_df.ix[min_spot, 'ClosePrice'] = float( tick_df.ix[tick_spot + spots_gap - 1, 'LastPrice']) min_df.ix[min_spot, 'Volume'] = int(tick_df.ix[tick_spot + spots_gap - 1, 'Volume']) min_df.ix[min_spot, 'OpenInterest'] = int( tick_df.ix[tick_spot + spots_gap - 1, 'OpenInterest']) except: print(tick_spot) print(tick_df.ix[tick_spot, 'Time']) exit(-1) print(min_df.head())
def __init__(self, ticker, num_of_tickers=None): if num_of_tickers is None: tick_info = Ticker() day = FutureDates().get_trading_day_list()[0] num_of_tickers = tick_info.get_num_of_tickers(ticker, day) ticker = str.lower(ticker[:2]) db_name, table_name = 'dates', 'future_order' + '_' + ticker self.table_name = table_name super(futureOrder, self).__init__(db_name) self.table_name = table_name ticker_columns = [ '{0}{1:0>4}'.format(ticker, str(i)) for i in range(1, num_of_tickers + 1) ] # print ticker_columns self.table_struct = Table(table_name,self.meta, Column('date',Integer,primary_key = True,autoincrement = False),\ *[ Column(i,String(20)) for i in ticker_columns ] ) self.future_order_struct = self.quick_map(self.table_struct)
def set_future_order_au(force_reload=True): num_of_ticker = Ticker().get_num_of_tickers('au', 20140102) fo = futureOrder('au', num_of_ticker) fo.set_order_shfex(force_reload=force_reload)
def set_order_shfex(self,ticker = 'au',method = 'avg_volume_open_interest',\ fixed_days = 3,force_reload = False): trading_day_list = FutureDates().get_trading_day_list() tick_info = Ticker() dbname = tick_info.get_dbname(ticker, 'day') table_name = tick_info.get_table_name(ticker, trading_day_list[0], 'day') rolling_day = self.get_exchange_rollling_day_shfex(offset=fixed_days) print(dbname, table_name, rolling_day) print('force_reload = ', force_reload) if force_reload: all_records = self.query_obj(self.future_order_struct) self.delete_lists_obj(all_records) exists_order_dates = [] else: exists_order_dates = set([ int(x.date) for x in self.query_obj(self.future_order_struct) ]) shfex_table_obj = data_model_day(db_name=dbname, table_name=table_name) if not shfex_table_obj.check_table_exist(): print('shfex day data does not exist!') return -1 df = pd.read_sql_table(table_name, shfex_table_obj.engine) trading_day_series = pd.Series(index=trading_day_list, data=list(range(len(trading_day_list)))) for date in trading_day_list: year, month, day = get_year_month_day(date) if not force_reload: if date in exists_order_dates: continue if date <= rolling_day[year][month]: rolling_date = rolling_day[year][month] else: try: rolling_date = rolling_day[year + int((month + 1) / 12)][int( (month + 1) % 12)] except: rolling_date = rolling_day[year][month] print(date, rolling_date) if method == 'avg_volume_open_interest': nth_day = trading_day_series[rolling_date] forward_days = fixed_days if nth_day >= fixed_days else nth_day consider_days = set([ trading_day_list[nth_day - i] for i in range(forward_days) ]) sub_df = df.ix[df['day'].apply(lambda x: x in consider_days), ['id', 'Volume', 'OpenInterest']] vol_dict = {} for real_ticker_id, sub_sub_df in sub_df.groupby('id'): vol_dict[real_ticker_id] = sub_sub_df[ sub_sub_df.columns[1:3]].sum().sum() vol_list = sorted(list(vol_dict.items()), key=lambda x: x[1], reverse=True) order = [pair[0] for pair in vol_list] to_be_inserted = [ date, ] to_be_inserted.extend(order) print(order) self.insert_listlike(self.future_order_struct, to_be_inserted, True)
def sample_min(self, ticker, start_date=None, end_date=None, force_reload=False): ticker = ticker[:2] days = [] if start_date is not None: days = [i for i in self.trading_days if start_date < i < end_date] else: days = self.trading_days day_mode = DayMode() ticker_info = Ticker() total_spots_tick = day_mode.cffex_last if ticker[: 2] in cffex_tickers else day_mode.other_last total_spots_min = int(total_spots_tick / self.spots_gap) db_name_min = ticker_info.get_dbname(ticker, level='min') db_name_tick = ticker_info.get_dbname(ticker, level='tick') print('spots count of tick/min = ', total_spots_tick, total_spots_min) def sample_sub_day_list(sample_days): for iday in sample_days: tick_table = data_model_tick(db_name_tick, str(iday)) if tick_table.check_table_exist(): tick_table.create_table() min_table = data_model_min(db_name_min, str(iday)) if force_reload and min_table.check_table_exist(): print('[warning] drop table = ', iday) min_table.drop_table(str(iday)) elif min_table.check_table_exist(): continue print(iday) min_table.create_table() tick_df_all = pd.read_sql_table(str(iday), tick_table.engine, index_col=['spot']) # print tick_df.head() for id, tick_df in tick_df_all.groupby('id'): min_df = pd.DataFrame(index=list( range(total_spots_min)), columns=min_columns) min_df.ix[:, 'id'] = id min_df.ix[:, 'day'] = iday min_df.ix[:, 'spot'] = min_df.index for tick_spot in range( 0, total_spots_tick - self.spots_gap, self.spots_gap): min_spot = tick_spot / self.spots_gap min_df.ix[min_spot, 'Time'] = tick_df.ix[ tick_spot, 'Time'].split('.')[0] min_df.ix[min_spot, 'OpenPrice'] = float( tick_df.ix[tick_spot, 'LastPrice']) min_df.ix[min_spot, 'HighPrice'] = float( tick_df.ix[tick_spot:tick_spot + self.spots_gap - 1, 'LastPrice'].max()) min_df.ix[min_spot, 'LowPrice'] = float( tick_df.ix[tick_spot:tick_spot + self.spots_gap - 1, 'LastPrice'].min()) min_df.ix[min_spot, 'ClosePrice'] = float( tick_df.ix[tick_spot + self.spots_gap - 1, 'LastPrice']) min_df.ix[min_spot, 'Volume'] = int( tick_df.ix[tick_spot + self.spots_gap - 1, 'Volume']) min_df.ix[min_spot, 'OpenInterest'] = int( tick_df.ix[tick_spot + self.spots_gap - 1, 'OpenInterest']) min_df.to_sql(str(iday), min_table.engine, index=False, if_exists='append') #start multiprocessing sub_day_list = list( map( list, np.split(days, [ len(days) / default_subprocess_numbers * i for i in range(1, default_subprocess_numbers) ]))) run_paralell_tasks(sample_sub_day_list, sub_day_list)