def get_new_stock_info(self, xnms, xinx):
     new_stock_data = bt.AZ_Load_csv(f'{self.root_path}/EM_Funda/CDSY_SECUCODE/LISTSTATE.csv')
     new_stock_data.fillna(method='ffill', inplace=True)
     # 获取交易日信息
     return_df = bt.AZ_Load_csv(f'{self.root_path}/EM_Funda/DERIVED_14/aadj_r.csv').astype(float)
     trade_time = return_df.index
     new_stock_data = new_stock_data.reindex(index=trade_time).fillna(method='ffill')
     target_df = new_stock_data.shift(40).notnull().astype(int)
     target_df = target_df.reindex(columns=xnms, index=xinx)
     return target_df
Beispiel #2
0
    def load_vsMCap_factor(self, file_name):
        load_path = '/mnt/mfs/DAT_EQT/EM_Funda/daily/'
        raw_df = bt.AZ_Load_csv(os.path.join(load_path, file_name + '.csv')) \
            .reindex(index=self.xinx, columns=self.xnms)

        mcap_df = bt.AZ_Load_csv('/mnt/mfs/DAT_EQT/EM_Funda/LICO_YS_STOCKVALUE/AmarketCapExStri.csv') \
            .reindex(index=self.xinx, columns=self.xnms)
        mcap_df_ma = bt.AZ_Rolling_mean(mcap_df.replace(0, np.nan), 60)
        tmp_df = raw_df / mcap_df_ma
        target_df = self.row_extre(tmp_df, self.sector_df, 0.3)
        return target_df
 def load_change_factor(self, file_name):
     load_path = f'{self.root_path}/EM_Funda/daily/'
     raw_df = bt.AZ_Load_csv(os.path.join(load_path, file_name + '.csv')) \
         .reindex(index=self.xinx, columns=self.xnms)
     QTTM_df = bt.AZ_Load_csv(os.path.join(load_path, '_'.join(file_name.split('_')[:-1]) + '_QTTM.csv')) \
         .reindex(index=self.xinx, columns=self.xnms)
     QTTM_df_ma = bt.AZ_Rolling_mean(QTTM_df.abs().replace(0, np.nan), 60)
     tmp_df = raw_df / QTTM_df_ma
     # target_df = bt.AZ_Row_zscore(tmp_df)
     target_df = self.row_extre(tmp_df, self.sector_df, 0.2)
     return target_df
    def load_locked_data(self):
        raw_suspendday_df = bt.AZ_Load_csv(
            os.path.join(self.root_path, 'EM_Funda/TRAD_TD_SUSPENDDAY/SUSPENDREASON.csv'))
        suspendday_df = raw_suspendday_df.isnull().astype(int)
        suspendday_df = suspendday_df.reindex(columns=self.xnms, index=self.xinx, fill_value=True)
        suspendday_df.replace(0, np.nan, inplace=True)

        return_df = bt.AZ_Load_csv(os.path.join(self.root_path, 'EM_Funda/DERIVED_14/aadj_r.csv')).astype(float)
        limit_buy_sell_df = (return_df.abs() < 0.095).astype(int)
        limit_buy_sell_df = limit_buy_sell_df.reindex(columns=self.xnms, index=self.xinx, fill_value=1)
        limit_buy_sell_df.replace(0, np.nan, inplace=True)
        return suspendday_df, limit_buy_sell_df
    def load_index_weight_data(self, index_name):
        index_info = bt.AZ_Load_csv(self.root_path + f'/EM_Funda/IDEX_YS_WEIGHT_A/SECURITYNAME_{index_name}.csv')
        index_info = self.reindex_fun(index_info)
        index_mask = (index_info.notnull() * 1).replace(0, np.nan)

        mkt_cap = bt.AZ_Load_csv(os.path.join(self.root_path, 'EM_Funda/LICO_YS_STOCKVALUE/AmarketCapExStri.csv'))
        mkt_roll = mkt_cap.rolling(250, min_periods=0).mean()
        mkt_roll = self.reindex_fun(mkt_roll)

        mkt_roll_qrt = np.sqrt(mkt_roll)
        mkt_roll_qrt_index = mkt_roll_qrt * index_mask
        index_weight = mkt_roll_qrt_index.div(mkt_roll_qrt_index.sum(axis=1), axis=0)
        return index_weight
Beispiel #6
0
def rzrq_create_factor(index_root_path, sector_df):
    # 融资融券数据
    rzrq_root_path = '/mnt/mfs/DAT_EQT/EM_Funda/TRAD_MT_MARGIN'
    name_list = [
        'RZRQYE', 'RZMRE', 'RZYE', 'RQMCL', 'RQYE', 'RQYL', 'RQCHL', 'RZCHE'
    ]
    # 均值
    rolling_mean_list = [5, 10, 20, 60]
    limit_list = [1, 1.5, 2]
    updn_list = [3, 4, 5]
    # 单个数据 简单的z-score
    for tab_name in name_list:
        print(tab_name)
        data = bt.AZ_Load_csv(os.path.join(rzrq_root_path, tab_name + '.csv'))
        data = data.reindex(index=sector_df.index, columns=sector_df.columns)
        data.replace(0, np.nan, inplace=True)
        pnd_roll_mean_row_extre_fun(tab_name, data, rolling_mean_list,
                                    limit_list, index_root_path, sector_df)
        pnd_col_extre_fun(tab_name, data, rolling_mean_list, limit_list,
                          index_root_path, sector_df)
        pnd_row_extre_fun(tab_name, data, limit_list, index_root_path,
                          sector_df)
        pnd_continue_up_dn_fun(tab_name, data, updn_list, index_root_path,
                               sector_df)

        data_p5d_chg = data.div(data.shift(5), fill_value=0) - 1
        data_p5d_chg.replace(np.inf, 0, inplace=True)
        pnd_col_extre_fun(tab_name + '_chg5', data_p5d_chg, rolling_mean_list,
                          limit_list, index_root_path, sector_df)
        pnd_row_extre_fun(tab_name + '_chg5', data_p5d_chg, limit_list,
                          index_root_path, sector_df)
        pnd_continue_up_dn_fun(tab_name + '_chg5', data_p5d_chg, updn_list,
                               index_root_path, sector_df)
Beispiel #7
0
 def load_ratio_factor(self, file_name):
     load_path = '/mnt/mfs/DAT_EQT/EM_Funda/daily/'
     tmp_df = bt.AZ_Load_csv(os.path.join(load_path, file_name + '.csv')) \
         .reindex(index=self.xinx, columns=self.xnms)
     # target_df = bt.AZ_Row_zscore(tmp_df)
     target_df = self.row_extre(tmp_df, self.sector_df, 0.3)
     return target_df
Beispiel #8
0
def load_raw_data(root_path, raw_data_path):
    raw_data_list = []
    for target_path in raw_data_path:
        tmp_data = bt.AZ_Load_csv(
            os.path.join(root_path.str(), target_path + '.csv'))
        raw_data_list += tmp_data
    return raw_data_list
def create_sector(root_path, name_list, sector_name, begin_date):
    market_top_n = bt.AZ_Load_csv(os.path.join(root_path, 'EM_Funda/DERIVED_10/' + sector_name + '.csv'))
    market_top_n = market_top_n[(market_top_n.index >= begin_date)]

    sum_df = pd.DataFrame()
    for n in name_list:
        tmp_df = bt.AZ_Load_csv('/mnt/mfs/DAT_EQT/EM_Funda/LICO_IM_INCHG/Global_Level1_{}.csv'.format(n))
        tmp_df = tmp_df[(tmp_df.index >= begin_date)]
        sum_df = sum_df.add(tmp_df, fill_value=0)

    if sum_df[sum_df > 1].sum().sum() != 0:
        print('error', name_list)
    else:
        market_top_n_sector = market_top_n.mul(sum_df)
        market_top_n_sector.dropna(how='all', axis='columns', inplace=True)
        market_top_n_sector.to_csv('/mnt/mfs/dat_whs/data/sector_data/{}_industry_{}.csv'
                                   .format(sector_name, '_'.join([str(x) for x in name_list])), sep='|')
    def get_st_stock_info(self, xnms, xinx):
        data = bt.AZ_Load_csv(os.path.join(self.root_path, 'EM_Funda/CDSY_CHANGEINFO/CHANGEA.csv'))
        data = data.reindex(columns=xnms, index=xinx)
        data.fillna(method='ffill', inplace=True)

        data = data.astype(str)
        target_df = data.applymap(lambda x: 0 if 'ST' in x or 'PT' in x else 1)
        return target_df
Beispiel #11
0
 def load_notice_factor(self, file_name):
     load_path = '/mnt/mfs/dat_whs/EM_Funda/my_data_test'
     tmp_df = bt.AZ_Load_csv(os.path.join(load_path, file_name + '.csv')) \
         .reindex(index=self.xinx, columns=self.xnms)
     target_df = self.row_extre(tmp_df, self.sector_df, 0.3)
     if self.if_only_long:
         target_df = target_df[target_df > 0]
     return target_df
Beispiel #12
0
def load_index_data(xinx, index_name):
    # root_index_path = os.path.join(root_path, 'data/index_data')
    # target_df = pd.read_pickle(os.path.join(root_index_path, index_name + '.pkl'))
    # target_df = target_df[target_df.columns[0]].reindex(index=xinx)

    data = bt.AZ_Load_csv(
        '/mnt/mfs/DAT_EQT/EM_Tab09/INDEX_TD_DAILYSYS/CHG.csv')
    target_df = data[index_name].reindex(index=xinx)
    return target_df * 0.01
    def load_sector_data(self):
        if self.sector_name.startswith('index'):
            index_name = self.sector_name.split('_')[-1]
            market_top_n = bt.AZ_Load_csv(f'{self.root_path}/EM_Funda/IDEX_YS_WEIGHT_A/SECURITYNAME_{index_name}.csv')
            market_top_n[market_top_n == market_top_n] = 1
        else:
            market_top_n = bt.AZ_Load_csv(f'{self.root_path}/EM_Funda/DERIVED_10/{self.sector_name}.csv')

        market_top_n = market_top_n.reindex(index=self.xinx)
        market_top_n.dropna(how='all', axis='columns', inplace=True)

        xnms = market_top_n.columns
        xinx = market_top_n.index

        new_stock_df = self.get_new_stock_info(xnms, xinx)
        st_stock_df = self.get_st_stock_info(xnms, xinx)
        sector_df = market_top_n * new_stock_df * st_stock_df
        sector_df.replace(0, np.nan, inplace=True)
        return sector_df
Beispiel #14
0
 def load_whs_factor(self, file_name):
     load_path = f'{self.root_path}/EM_Funda/dat_whs/'
     tmp_df = bt.AZ_Load_csv(os.path.join(load_path, file_name + '.csv'))
     # if self.xinx[-1] not in tmp_df.index:
     #     send_email.send_email(file_name + self.sector_name, ['*****@*****.**'], [], '[LOADDATA]error')
     tmp_df = tmp_df.reindex(index=self.xinx, columns=self.xnms)
     target_df = self.row_extre(tmp_df, self.sector_df, 0.3)
     if self.if_only_long:
         target_df = target_df[target_df > 0]
     return target_df
 def load_jerry_factor(self, file_name):
     factor_path = '/mnt/mfs/temp/dat_jerry/signal'
     raw_df = bt.AZ_Load_csv(f'{factor_path}/{file_name}')
     a = list(set(raw_df.iloc[-1, :100].dropna().values))
     tmp_df = raw_df.reindex(index=self.xinx, columns=self.xnms)
     if len(a) > 5:
         target_df = self.row_extre(tmp_df, self.sector_df, 0.3)
     else:
         target_df = tmp_df
     if self.if_only_long:
         target_df = target_df[target_df > 0]
     return target_df
Beispiel #16
0
    def load_ic_if_diff(self, file_name):
        data = bt.AZ_Load_csv(os.path.join('/mnt/mfs/DAT_EQT/EM_Funda/INDEX_TD_DAILYSYS/CHG.csv'))
        hs300 = data['000300'].reindex(index=self.xinx) * 0.01
        zz500 = data['000905'].reindex(index=self.xinx) * 0.01

        hs300_mean = bt.AZ_Rolling_mean(hs300, 10, 0)
        zz500_mean = bt.AZ_Rolling_mean(zz500, 10, 0)
        tmp_df = hs300_mean - zz500_mean
        ic_if_martix = tmp_df > 0

        target_df = pd.DataFrame(np.array([ic_if_martix.values.ravel()] * len(self.xnms)).T,
                                 index=self.xinx, columns=self.xnms)
        return target_df
 def load_whs_factor(self, file_name):
     load_path = f'{self.root_path}/EM_Funda/dat_whs'
     raw_df = bt.AZ_Load_csv(f'{load_path}/{file_name}.csv')
     a = list(set(raw_df.iloc[-1, :100].dropna().values))
     tmp_df = raw_df.reindex(index=self.xinx, columns=self.xnms)
     if len(a) > 5:
         target_df = self.row_extre(tmp_df, self.sector_df, 0.3)
     else:
         target_df = tmp_df
         pass
     if self.if_only_long:
         target_df = target_df[target_df > 0]
     return target_df
Beispiel #18
0
    def load_sector_data(self):

        market_top_n = bt.AZ_Load_csv(os.path.join(self.root_path, 'EM_Funda/DERIVED_10/' + self.sector_name + '.csv'))
        market_top_n = market_top_n[(market_top_n.index >= self.begin_date) & (market_top_n.index < self.end_date)]
        market_top_n.dropna(how='all', axis='columns', inplace=True)
        xnms = market_top_n.columns
        xinx = market_top_n.index

        new_stock_df = self.get_new_stock_info(xnms, xinx)
        st_stock_df = self.get_st_stock_info(xnms, xinx)
        sector_df = market_top_n * new_stock_df * st_stock_df
        sector_df.replace(0, np.nan, inplace=True)
        return sector_df
 def load_remy_factor(self, file_name):
     load_path = '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_F1'
     raw_df = bt.AZ_Load_csv(f'{load_path}/{file_name}')
     a = list(set(raw_df.iloc[-1, :100].dropna().values))
     tmp_df = raw_df.reindex(index=self.xinx, columns=self.xnms)
     if len(a) > 5:
         target_df = self.row_extre(tmp_df, self.sector_df, 0.3)
     else:
         target_df = tmp_df
         pass
     if self.if_only_long:
         target_df = target_df[target_df > 0]
     return target_df
Beispiel #20
0
    def __init__(self, sector_name):
        begin_date = pd.to_datetime('20050505')
        end_date = datetime.now()
        # sector_name = 'market_top_2000'
        self.sector_name = sector_name
        market_top_n = bt.AZ_Load_csv(
            os.path.join('/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_10/' +
                         sector_name + '.csv'))
        market_top_n = market_top_n[(market_top_n.index >= begin_date)
                                    & (market_top_n.index < end_date)]
        self.sector_df = market_top_n
        xinx = self.sector_df.index
        xnms = self.sector_df.columns

        aadj_r = bt.AZ_Load_csv(
            '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv')
        self.aadj_r = aadj_r.reindex(index=xinx, columns=xnms)

        aadj_p = bt.AZ_Load_csv(
            '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_p.csv')
        self.aadj_p = aadj_p.reindex(index=xinx, columns=xnms)

        self.save_path = '/mnt/mfs/dat_whs/data/sector_data'
Beispiel #21
0
 def industry(self, file_list):
     # self.sector_df
     industry_df_sum = pd.DataFrame()
     for file_name in file_list:
         industry_df = bt.AZ_Load_csv(
             f'/mnt/mfs/DAT_EQT/EM_Funda/LICO_IM_INCHG/Global_Level1_{file_name}.csv'
         )
         industry_df_sum = industry_df_sum.add(industry_df, fill_value=0)
     industry_df_sum = self.sector_df.mul(industry_df_sum, fill_value=0).replace(0, np.nan)\
         .dropna(how='all', axis='columns')
     industry_df_sum.to_csv(
         '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_10/{}_industry_{}.csv'.format(
             self.sector_name, '_'.join([str(x) for x in file_list])))
     return industry_df_sum
    def load_sector_data(self):
        market_top_n = bt.AZ_Load_csv(
            f'/mnt/mfs/dat_whs/data/sector_data/{self.sector_name}.csv')
        # print(market_top_n.iloc[-1].dropna())
        market_top_n = market_top_n[(market_top_n.index >= self.begin_date)
                                    & (market_top_n.index < self.end_date)]
        market_top_n.dropna(how='all', axis='columns', inplace=True)
        xnms = market_top_n.columns
        xinx = market_top_n.index

        new_stock_df = self.get_new_stock_info(xnms, xinx)
        st_stock_df = self.get_st_stock_info(xnms, xinx)
        sector_df = market_top_n * new_stock_df * st_stock_df
        sector_df.replace(0, np.nan, inplace=True)
        return sector_df
Beispiel #23
0
def load_sector_data(begin_date, end_date, sector_name):
    market_top_n = bt.AZ_Load_csv(
        os.path.join(stock_data_path,
                     'EM_Funda/DERIVED_10/' + sector_name + '.csv'))
    market_top_n = market_top_n.shift(1)[(market_top_n.index >= begin_date)
                                         & (market_top_n.index < end_date)]
    market_top_n = market_top_n[market_top_n.index >= begin_date]
    market_top_n.dropna(how='all', axis='columns', inplace=True)
    xnms = market_top_n.columns
    xinx = market_top_n.index

    new_stock_df = get_new_stock_info(xnms, xinx).shift(1)
    st_stock_df = get_st_stock_info(xnms, xinx).shift(2)
    sector_df = market_top_n * new_stock_df * st_stock_df
    sector_df.replace(0, np.nan, inplace=True)
    return sector_df
Beispiel #24
0
    def load_remy_factor(self, file_name):
        load_path = f'{self.root_path}/EM_Funda/DERIVED_F1'
        raw_df = bt.AZ_Load_csv(f'{load_path}/{file_name}')
        a = list(set(raw_df.iloc[-1, :100].dropna().values))
        if self.xinx[-1] not in raw_df.index:
            send_email.send_email(file_name + self.sector_name,
                                  ['*****@*****.**'], [], '[LOADDATA]error')
        tmp_df = raw_df.reindex(index=self.xinx, columns=self.xnms)

        if len(a) > 5:
            target_df = self.row_extre(tmp_df, self.sector_df, 0.3)
        else:
            target_df = tmp_df
            pass
        if self.if_only_long:
            target_df = target_df[target_df > 0]
        return target_df
    def load_remy_factor(self, file_name, sector_name):
        if sector_name.startswith('market_top_300plus'):
            factor_path = '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_F3/T300P'

        elif sector_name.startswith('market_top_300to800plus'):
            factor_path = '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_F3/T500P'

        else:
            factor_path = '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_F3/T500P'
        raw_df = bt.AZ_Load_csv(f'{factor_path}/{file_name}')
        a = list(set(raw_df.iloc[-1, :100].dropna().values))
        tmp_df = raw_df.reindex(index=self.xinx, columns=self.xnms)
        if len(a) > 5:
            target_df = self.row_extre(tmp_df, self.sector_df, 0.3)
        else:
            target_df = tmp_df
            pass
        if self.if_only_long:
            target_df = target_df[target_df > 0]
        return target_df
Beispiel #26
0
    def __init__(self, root_path, if_save, if_new_program, begin_date, cut_date, end_date, time_para_dict, sector_name,
                 index_name, hold_time, lag, return_file, if_hedge, if_only_long):
        self.root_path = root_path
        self.if_save = if_save
        self.if_new_program = if_new_program
        self.begin_date = begin_date
        self.cut_date = cut_date
        self.end_date = end_date
        self.time_para_dict = time_para_dict
        self.sector_name = sector_name
        self.index_name = index_name
        self.hold_time = hold_time
        self.lag = lag
        self.return_file = return_file
        self.if_hedge = if_hedge
        self.if_only_long = if_only_long

        self.sector_df = self.load_sector_data()
        print('Loaded sector DataFrame!')
        self.xnms = self.sector_df.columns
        self.xinx = self.sector_df.index

        return_choose = bt.AZ_Load_csv(os.path.join(root_path, 'EM_Funda/DERIVED_14/aadj_r.csv'))
        self.return_choose = return_choose.reindex(index=self.xinx, columns=self.xnms)
        print('Loaded return DataFrame!')

        suspendday_df, limit_buy_sell_df = self.load_locked_data()
        limit_buy_sell_df_c = limit_buy_sell_df.shift(-1)
        limit_buy_sell_df_c.iloc[-1] = 1

        suspendday_df_c = suspendday_df.shift(-1)
        suspendday_df_c.iloc[-1] = 1
        self.suspendday_df_c = suspendday_df_c
        self.limit_buy_sell_df_c = limit_buy_sell_df_c
        print('Loaded suspendday_df and limit_buy_sell DataFrame!')
        self.index_df = self.load_index_data()
        print('Loaded index DataFrame!')
Beispiel #27
0
        # self.sector_df
        industry_df_sum = pd.DataFrame()
        for file_name in file_list:
            industry_df = bt.AZ_Load_csv(
                f'/mnt/mfs/DAT_EQT/EM_Funda/LICO_IM_INCHG/Global_Level1_{file_name}.csv'
            )
            industry_df_sum = industry_df_sum.add(industry_df, fill_value=0)
        industry_df_sum = self.sector_df.mul(industry_df_sum, fill_value=0).replace(0, np.nan)\
            .dropna(how='all', axis='columns')
        industry_df_sum.to_csv(
            '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_10/{}_industry_{}.csv'.format(
                self.sector_name, '_'.join([str(x) for x in file_list])))
        return industry_df_sum


if __name__ == '__main__':
    sector_split = SectorSplit('market_top_2000')
    for file_list in [[10, 15], [20, 25, 30, 35], [40], [45, 50], [55]]:
        industry_df_sum = sector_split.industry(file_list)
        market_top_n = bt.AZ_Load_csv(
            '/mnt/mfs/dat_whs/data/sector_data/market_top_2000_industry_{}.csv'
            .format('_'.join([str(x) for x in file_list])))
        a = industry_df_sum.loc[pd.to_datetime('20180829')].dropna()
        b = market_top_n.loc[pd.to_datetime('20180829')].dropna()
        # print((industry_df_sum.loc[pd.to_datetime('20100829'):pd.to_datetime('20180829')]
        #        != market_top_n.loc[pd.to_datetime('20100829'):pd.to_datetime('20180829')]).sum().sum())
        # print((industry_df_sum > 1).sum().sum())
        # print((industry_df_sum == 0).sum().sum())
        # print((market_top_n > 1).sum().sum())
        # print((market_top_n == 0).sum().sum())
Beispiel #28
0
 def load_index_data(self, index_name):
     data = bt.AZ_Load_csv(
         os.path.join(self.root_path, 'EM_Funda/INDEX_TD_DAILYSYS/CHG.csv'))
     target_df = data[index_name].reindex(index=self.xinx)
     return target_df * 0.01
Beispiel #29
0
 def load_return_data(self):
     return_choose = bt.AZ_Load_csv(
         os.path.join(self.root_path, 'EM_Funda/DERIVED_14/aadj_r.csv'))
     return_choose = return_choose[(return_choose.index >= self.begin_date)
                                   & (return_choose.index < self.end_date)]
     return return_choose
Beispiel #30
0
#         intra_data = intra_data[sorted(intra_data.columns)]
#         limit_list = [1, 1.5, 2]
#         para_list = [10, 20, 60]
#         # pnd_row_extre_fun(tab_name, intra_data, limit_list, index_root_path)
#         pnd_col_extre_fun(tab_name, intra_data, para_list, limit_list, factor_save_path, sector_df)

####################################################################################################################
if __name__ == '__main__':

    sector_data_path = '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_10'

    base_data_path = '/mnt/mfs/DAT_EQT/EM_Tab14/TRAD_SK_DAILY_JC'

    factor_save_path = '/media/hdd1/dat_whs/data/new_factor_data'

    EQA_open = bt.AZ_Load_csv(os.path.join(base_data_path, 'OPEN.csv'))
    EQA_high = bt.AZ_Load_csv(os.path.join(base_data_path, 'HIGH.csv'))
    EQA_low = bt.AZ_Load_csv(os.path.join(base_data_path, 'LOW.csv'))
    EQA_close = bt.AZ_Load_csv(os.path.join(base_data_path, 'NEW.csv'))
    EQA_volume = bt.AZ_Load_csv(os.path.join(base_data_path, 'TVOL.csv'))
    EQA_amount = bt.AZ_Load_csv(os.path.join(base_data_path, 'TVALCNY.csv'))
    EQA_adj_r = bt.AZ_Load_csv(
        '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv')
    begin_str = '20100101'
    end_str = '20180401'

    # pool = Pool(20)
    # intraday_open_1_hour_vwap(begin_str, end_str)
    # intraday_create_factor(begin_str, end_str, factor_save_path)

    for sector in ['market_top_500']: