def estab_all_tag_data(day_list):
    tag_all = ['totoff', 'vol', 'totbid', 'amount', 'last', 'trade', 'bid1', 'ask1', 'bid_vol1', 'bid_vol2', 'bid_vol3',
               'bid_vol4', 'bid_vol5', 'bid_vol6', 'bid_vol7', 'bid_vol8', \
               'bid_vol9', 'bid_vol10', 'ask_vol1', 'ask_vol2', 'ask_vol3', 'ask_vol4', 'ask_vol5', 'ask_vol6',
               'ask_vol7', 'ask_vol8', 'ask_vol9', 'ask_vol10']
    tag_accum = ['amount', 'trade', 'vol', 'totbid', 'totoff']
    tag_dtype_float64 = ['totoff', 'vol', 'totbid', 'amount']
    # all_tag_data = []
    for i, day in enumerate(day_list):
        all_tag_array = np.zeros((3690, len(tag_all) * 240))
        '''
        check whether tick data exists 
        '''
        year = str(day // 10000).zfill(4)
        month = str(day // 100 % 100).zfill(2)
        date = str(day % 100).zfill(2)
        path_day = os.path.join(data_path, year, month, date)
        if not os.path.exists(path_day):
            continue
        for i2, tag in enumerate(tag_all):
            if tag in tag_dtype_float64:
                dtype1 = 'float64'
            else:
                dtype1 = 'float32'
            df_temp = get_md(day, tag, dtype=dtype1).T
            array_data = df_temp.values  # codes * ticks 3627*4802
            # print(array_data[0,:])
            if re.search(r'ask[0-9]+', tag) != None or re.search(
                    r'bid[0-9]+', tag) != None:
                array_open = get_md(day, 'open', dtype='float32').T.values
                array_data -= array_open
                array_data = array_data[:, 1:]
            elif tag in tag_accum:
                array_data = chafen(tag, array_data)
            else:
                array_data = array_data[:, 1:]
            fullfill_data(array_data)
            array_data = array_data[:, np.arange(0, 4800, 10)]
            # print(array_data.shape)
            all_tag_array[:,
                          np.arange(240) * len(tag_all) +
                          np.ones(240, dtype='int64') *
                          i2] = array_data[:, np.arange(240)].copy()
            if i == 0:
                plt.figure(1, figsize=(15, 10))
                plt.plot(np.arange(array_data.shape[1]),
                         array_data[0, :],
                         c='blue')
                plt.xlabel('x', fontsize=30)
                plt.ylabel('y', fontsize=30)
                plt.title('view of ' + tag, fontsize=40)
                plt.savefig(tag)
                plt.close(1)
                # all_tag_data.append(all_tag_array)
        np.save(path + str(day), all_tag_array)
Beispiel #2
0
def read_data_by_tag():
    '''
    tag_all = ['totoff', 'vol', 'totbid', 'amount', 'last', 'low', 'high', 'open', 'avebid', 'aveoff', 'trade', 'bid1',
               'bid2',
               'bid3', 'bid4', 'bid5', 'bid6', 'bid7', 'bid8', 'bid9', 'bid10', 'ask1', 'ask2', 'ask3', 'ask4', 'ask5',
               'ask6', 'ask7', 'ask8', 'ask9', 'ask10', 'bid_vol1', 'bid_vol2', 'bid_vol3', 'bid_vol4', 'bid_vol5',
               'bid_vol6', 'bid_vol7', 'bid_vol8', 'bid_vol9',
               'bid_vol10', 'ask_vol1', 'ask_vol2', 'ask_vol3', 'ask_vol4', 'ask_vol5', 'ask_vol6', 'ask_vol7',
               'ask_vol8', 'ask_vol9',
               'ask_vol10']
    '''
    tag_dtype_int64 = ['totoff', 'vol', 'totbid', 'amount']

    # print(len(tag_all))
    day_list = [20160504, 20160505, 20160506]
    # start_day = 20160101
    # end_day = 20160131
    # day_list = IndexData().get_deal_day_list_in_period(start_day,end_day)
    # print(day_list)
    data_norm = {}
    data_cha = {}
    tag_all = ['ask1']
    for tag in tag_all:
        #file_path = path + '/' + tag + '.txt'
        # file_path2 = path + '/' + tag + '_norm' + '.txt'
        if tag in tag_dtype_int64:
            dtype1 = 'int64'
        else:
            dtype1 = 'float32'
        i = 0
        for date in day_list:
            df_temp = get_md(date, tag, dtype=dtype1).T
            array_data_temp = df_temp.values
            # print(array_data_temp)
            if i == 0:
                array_data = array_data_temp.copy()
            else:
                array_data = np.hstack((array_data, array_data_temp))
                # print(array_data.shape)
            # array_data (3627, 4802, 3)
            i += 1
        nor_data = normalization(array_data)
        data_norm[tag] = nor_data
        cha_data = chafen(tag, array_data)
        data_cha[tag] = cha_data
        #np.savetxt(file_path, cha_data, delimiter=',')
        # np.savetxt(file_path2, nor_data,delimiter=',')
        plt.figure(1, figsize=(15, 10))
        plt.plot(np.arange(4801), cha_data[0, 0:4801], c='blue')
        # if tag=='vol':
        #         print(nor_data[0,0:4801])
        plt.xlabel('x', fontsize=30)
        plt.ylabel('y', fontsize=30)
        plt.title(tag, fontsize=40)
        plt.savefig('chafen_fig/' + tag + '_' + 'chafen')
        plt.close(1)

    return data_norm, data_cha
def estab_one_day_data_lab(tag,
                           day_list,
                           is_chafen=False,
                           is_zero_mean=True,
                           is_minus_open=True):
    tag_dtype_float64 = ['totoff', 'vol', 'totbid', 'amount']
    one_day_data = []

    if tag in tag_dtype_float64:
        dtype1 = 'float64'
    else:
        dtype1 = 'float32'
    for i, day in enumerate(day_list):
        '''
        check whether tick data exists 
        '''
        year = str(day // 10000).zfill(4)
        month = str(day // 100 % 100).zfill(2)
        date = str(day % 100).zfill(2)
        path_day = os.path.join(data_path, year, month, date)
        if not os.path.exists(path_day):
            one_day_data.append(np.zeros((1, 1)))
        else:
            df_temp = get_md(day, tag, dtype=dtype1).T
            array_data = df_temp.values  # codes * ticks
            if is_minus_open == True:
                array_open = get_md(day, 'open', dtype='float32').T.values
                array_data -= array_open
            fullfill_data(array_data)
            if is_zero_mean == True:
                array_data -= (np.mean(array_data, axis=1) * np.ones(
                    (array_data.shape[1], array_data.shape[0]))).T
            if is_chafen == True:
                array_data = chafen(tag, array_data)
            else:
                array_data = array_data[:, 1:]
            # print(array_data[np.nonzero(np.isnan(array_data[:,0])==False)[0], :])
            # min_temp = np.min(array_data[np.nonzero(np.isnan(array_data[:,0])==False)[0], :], axis = 0)
            # max_temp = np.max(array_data[np.nonzero(np.isnan(array_data[:,0])==False)[0], :], axis = 0)
            # print(min_temp.shape)
            one_day_data.append(array_data)
    return one_day_data
def estab_open_low_high(day_list):
    tag_olh = ['open', 'low', 'high']
    for i, day in enumerate(day_list):
        olh_array = np.zeros((3690, 3))
        '''
        check whether tick data exists 
        '''
        year = str(day // 10000).zfill(4)
        month = str(day // 100 % 100).zfill(2)
        date = str(day % 100).zfill(2)
        path_day = os.path.join(data_path, year, month, date)
        if not os.path.exists(path_day):
            continue
        for i2, tag in enumerate(tag_olh):
            df_temp = get_md(day, tag, dtype='float32').T
            array_data = df_temp.values
            olh_array[:, i2] = array_data[:, -1]
        # all_tag_data.append(all_tag_array)
        np.save(path + 'olh/' + str(day), olh_array)