def __init__(self, data_path, market_name, tickers_fname, parameters, steps=1, epochs=50, batch_size=None, gpu=False): self.data_path = data_path self.market_name = market_name self.tickers_fname = tickers_fname # load data self.tickers = np.genfromtxt(os.path.join(data_path, '..', tickers_fname), dtype=str, delimiter='\t', skip_header=False) ### DEBUG # self.tickers = self.tickers[0: 10] print('#tickers selected:', len(self.tickers)) self.eod_data, self.mask_data, self.gt_data, self.price_data = \ load_EOD_data(data_path, market_name, self.tickers, steps) self.parameters = copy.copy(parameters) self.steps = steps ## depends on your strategy(default one-day) self.epochs = epochs if batch_size is None: self.batch_size = len(self.tickers) else: self.batch_size = batch_size self.valid_index = 756 self.test_index = 1008 self.trade_dates = self.mask_data.shape[1] self.fea_dim = 5 self.gpu = gpu
def __init__(self, data_path, market_name, tickers_fname, relation_name, emb_fname, parameters, steps=1, epochs=50, batch_size=None, flat=False, gpu=False, in_pro=False): seed = 123456789 random.seed(seed) np.random.seed(seed) tf.set_random_seed(seed) self.data_path = data_path self.market_name = market_name self.tickers_fname = tickers_fname self.relation_name = relation_name # load data self.tickers = np.genfromtxt(os.path.join(data_path, '..', tickers_fname), dtype=str, delimiter='\t', skip_header=False) print('#tickers selected:', len(self.tickers)) self.eod_data, self.mask_data, self.gt_data, self.price_data = \ load_EOD_data(data_path, market_name, self.tickers, steps) # relation data rname_tail = { 'sector_industry': '_industry_relation.npy', 'wikidata': '_wiki_relation.npy' } self.rel_encoding, self.rel_mask = load_relation_data( os.path.join(self.data_path, '..', 'relation', self.relation_name, self.market_name + rname_tail[self.relation_name])) print('relation encoding shape:', self.rel_encoding.shape) print('relation mask shape:', self.rel_mask.shape) self.embedding = np.load( os.path.join(self.data_path, '..', 'pretrain', emb_fname)) print('embedding shape:', self.embedding.shape) self.parameters = copy.copy(parameters) self.steps = steps self.epochs = epochs self.flat = flat self.inner_prod = in_pro if batch_size is None: self.batch_size = len(self.tickers) else: self.batch_size = batch_size self.valid_index = 756 self.test_index = 1008 self.trade_dates = self.mask_data.shape[1] self.fea_dim = 5 self.gpu = gpu
from load_data import load_EOD_data, load_relation_data # params data_path = '../data/2013-01-01' market_name = 'NASDAQ' tickers_fname = market_name + '_tickers_qualify_dr-0.98_min-5_smooth.csv' steps = 1 # load stock data tickers = np.genfromtxt(os.path.join(data_path, '..', tickers_fname), dtype=str, delimiter='\t', skip_header=False) print('#tickers selected:', len(tickers)) eod_data, mask_data, gt_data, price_data = \ load_EOD_data(data_path, market_name, tickers, steps) # corr tensor corr_size = 30 num_companies, num_timesteps = gt_data.shape correlation_matrix_shape = (num_timesteps - corr_size, num_companies, num_companies) corr = np.ones(correlation_matrix_shape) print(corr.shape) iu = np.triu_indices(num_companies,k=1) il = (iu[1],iu[0]) for t in tqdm(range(num_timesteps - corr_size)): for c1 in tqdm(range(num_companies)): for c2 in range(1, num_companies - c1): c1_movements = gt_data.T[t:t+corr_size][:,c1]
def __init__(self, data_path, market_name, tickers_fname, relation_name, emb_fname, parameters, steps=1, epochs=50, batch_size=None, flat=False, gpu=False, in_pro=False): seed = 123456789 random.seed(seed) np.random.seed(seed) tf.set_random_seed(seed) self.data_path = data_path self.market_name = market_name self.tickers_fname = tickers_fname self.relation_name = relation_name # load data self.tickers = np.genfromtxt(os.path.join(data_path, '..', tickers_fname), dtype=str, delimiter='\t', skip_header=False) print('#tickers selected:', len(self.tickers)) self.eod_data, self.mask_data, self.gt_data, self.price_data = \ load_EOD_data(data_path, market_name, self.tickers, steps) # relation data rname_tail = { 'sector_industry': '_industry_relation.npy', 'wikidata': '_wiki_relation.npy' } self.rel_encoding, self.rel_mask = load_relation_data( os.path.join(self.data_path, '..', 'relation', self.relation_name, self.market_name + rname_tail[self.relation_name])) print('relation encoding shape:', self.rel_encoding.shape) print('relation mask shape:', self.rel_mask.shape) self.embedding = np.load( os.path.join(self.data_path, '..', 'pretrain', emb_fname)) print('embedding shape:', self.embedding.shape) self.emb_tickers = np.genfromtxt(os.path.join( self.data_path, '..', self.market_name + '_tickers_qualify_dr-0.98_min-5_smooth.csv'), dtype=str, delimiter='\t', skip_header=False) new_emb = [] for i, ticker in enumerate(self.tickers): ind = np.where(self.emb_tickers == ticker)[0][0] new_emb.append(self.embedding[ind][-len(self.eod_data[i]):]) self.embedding = np.array(new_emb) self.parameters = copy.copy(parameters) self.steps = steps self.epochs = epochs self.flat = flat self.inner_prod = in_pro if batch_size is None: self.batch_size = len(self.tickers) else: self.batch_size = batch_size self.valid_index = 253 self.test_index = 378 print(f"-----\n{self.mask_data.shape}\n-----") self.trade_dates = self.mask_data.shape[1] self.fea_dim = 5 self.gpu = gpu