def estab_all_tag_data(day_list): tag_all = ['totoff', 'vol', 'totbid', 'amount', 'last', 'trade', 'bid1', 'ask1', 'bid_vol1', 'bid_vol2', 'bid_vol3', 'bid_vol4', 'bid_vol5', 'bid_vol6', 'bid_vol7', 'bid_vol8', \ 'bid_vol9', 'bid_vol10', 'ask_vol1', 'ask_vol2', 'ask_vol3', 'ask_vol4', 'ask_vol5', 'ask_vol6', 'ask_vol7', 'ask_vol8', 'ask_vol9', 'ask_vol10'] tag_accum = ['amount', 'trade', 'vol', 'totbid', 'totoff'] tag_dtype_float64 = ['totoff', 'vol', 'totbid', 'amount'] # all_tag_data = [] for i, day in enumerate(day_list): all_tag_array = np.zeros((3690, len(tag_all) * 240)) ''' check whether tick data exists ''' year = str(day // 10000).zfill(4) month = str(day // 100 % 100).zfill(2) date = str(day % 100).zfill(2) path_day = os.path.join(data_path, year, month, date) if not os.path.exists(path_day): continue for i2, tag in enumerate(tag_all): if tag in tag_dtype_float64: dtype1 = 'float64' else: dtype1 = 'float32' df_temp = get_md(day, tag, dtype=dtype1).T array_data = df_temp.values # codes * ticks 3627*4802 # print(array_data[0,:]) if re.search(r'ask[0-9]+', tag) != None or re.search( r'bid[0-9]+', tag) != None: array_open = get_md(day, 'open', dtype='float32').T.values array_data -= array_open array_data = array_data[:, 1:] elif tag in tag_accum: array_data = chafen(tag, array_data) else: array_data = array_data[:, 1:] fullfill_data(array_data) array_data = array_data[:, np.arange(0, 4800, 10)] # print(array_data.shape) all_tag_array[:, np.arange(240) * len(tag_all) + np.ones(240, dtype='int64') * i2] = array_data[:, np.arange(240)].copy() if i == 0: plt.figure(1, figsize=(15, 10)) plt.plot(np.arange(array_data.shape[1]), array_data[0, :], c='blue') plt.xlabel('x', fontsize=30) plt.ylabel('y', fontsize=30) plt.title('view of ' + tag, fontsize=40) plt.savefig(tag) plt.close(1) # all_tag_data.append(all_tag_array) np.save(path + str(day), all_tag_array)
def read_data_by_tag(): ''' tag_all = ['totoff', 'vol', 'totbid', 'amount', 'last', 'low', 'high', 'open', 'avebid', 'aveoff', 'trade', 'bid1', 'bid2', 'bid3', 'bid4', 'bid5', 'bid6', 'bid7', 'bid8', 'bid9', 'bid10', 'ask1', 'ask2', 'ask3', 'ask4', 'ask5', 'ask6', 'ask7', 'ask8', 'ask9', 'ask10', 'bid_vol1', 'bid_vol2', 'bid_vol3', 'bid_vol4', 'bid_vol5', 'bid_vol6', 'bid_vol7', 'bid_vol8', 'bid_vol9', 'bid_vol10', 'ask_vol1', 'ask_vol2', 'ask_vol3', 'ask_vol4', 'ask_vol5', 'ask_vol6', 'ask_vol7', 'ask_vol8', 'ask_vol9', 'ask_vol10'] ''' tag_dtype_int64 = ['totoff', 'vol', 'totbid', 'amount'] # print(len(tag_all)) day_list = [20160504, 20160505, 20160506] # start_day = 20160101 # end_day = 20160131 # day_list = IndexData().get_deal_day_list_in_period(start_day,end_day) # print(day_list) data_norm = {} data_cha = {} tag_all = ['ask1'] for tag in tag_all: #file_path = path + '/' + tag + '.txt' # file_path2 = path + '/' + tag + '_norm' + '.txt' if tag in tag_dtype_int64: dtype1 = 'int64' else: dtype1 = 'float32' i = 0 for date in day_list: df_temp = get_md(date, tag, dtype=dtype1).T array_data_temp = df_temp.values # print(array_data_temp) if i == 0: array_data = array_data_temp.copy() else: array_data = np.hstack((array_data, array_data_temp)) # print(array_data.shape) # array_data (3627, 4802, 3) i += 1 nor_data = normalization(array_data) data_norm[tag] = nor_data cha_data = chafen(tag, array_data) data_cha[tag] = cha_data #np.savetxt(file_path, cha_data, delimiter=',') # np.savetxt(file_path2, nor_data,delimiter=',') plt.figure(1, figsize=(15, 10)) plt.plot(np.arange(4801), cha_data[0, 0:4801], c='blue') # if tag=='vol': # print(nor_data[0,0:4801]) plt.xlabel('x', fontsize=30) plt.ylabel('y', fontsize=30) plt.title(tag, fontsize=40) plt.savefig('chafen_fig/' + tag + '_' + 'chafen') plt.close(1) return data_norm, data_cha
def estab_one_day_data_lab(tag, day_list, is_chafen=False, is_zero_mean=True, is_minus_open=True): tag_dtype_float64 = ['totoff', 'vol', 'totbid', 'amount'] one_day_data = [] if tag in tag_dtype_float64: dtype1 = 'float64' else: dtype1 = 'float32' for i, day in enumerate(day_list): ''' check whether tick data exists ''' year = str(day // 10000).zfill(4) month = str(day // 100 % 100).zfill(2) date = str(day % 100).zfill(2) path_day = os.path.join(data_path, year, month, date) if not os.path.exists(path_day): one_day_data.append(np.zeros((1, 1))) else: df_temp = get_md(day, tag, dtype=dtype1).T array_data = df_temp.values # codes * ticks if is_minus_open == True: array_open = get_md(day, 'open', dtype='float32').T.values array_data -= array_open fullfill_data(array_data) if is_zero_mean == True: array_data -= (np.mean(array_data, axis=1) * np.ones( (array_data.shape[1], array_data.shape[0]))).T if is_chafen == True: array_data = chafen(tag, array_data) else: array_data = array_data[:, 1:] # print(array_data[np.nonzero(np.isnan(array_data[:,0])==False)[0], :]) # min_temp = np.min(array_data[np.nonzero(np.isnan(array_data[:,0])==False)[0], :], axis = 0) # max_temp = np.max(array_data[np.nonzero(np.isnan(array_data[:,0])==False)[0], :], axis = 0) # print(min_temp.shape) one_day_data.append(array_data) return one_day_data
def estab_open_low_high(day_list): tag_olh = ['open', 'low', 'high'] for i, day in enumerate(day_list): olh_array = np.zeros((3690, 3)) ''' check whether tick data exists ''' year = str(day // 10000).zfill(4) month = str(day // 100 % 100).zfill(2) date = str(day % 100).zfill(2) path_day = os.path.join(data_path, year, month, date) if not os.path.exists(path_day): continue for i2, tag in enumerate(tag_olh): df_temp = get_md(day, tag, dtype='float32').T array_data = df_temp.values olh_array[:, i2] = array_data[:, -1] # all_tag_data.append(all_tag_array) np.save(path + 'olh/' + str(day), olh_array)