def __init__(self,
                 data_path,
                 market_name,
                 tickers_fname,
                 parameters,
                 steps=1,
                 epochs=50,
                 batch_size=None,
                 gpu=False):
        self.data_path = data_path
        self.market_name = market_name
        self.tickers_fname = tickers_fname
        # load data
        self.tickers = np.genfromtxt(os.path.join(data_path, '..',
                                                  tickers_fname),
                                     dtype=str,
                                     delimiter='\t',
                                     skip_header=False)
        ### DEBUG
        # self.tickers = self.tickers[0: 10]
        print('#tickers selected:', len(self.tickers))
        self.eod_data, self.mask_data, self.gt_data, self.price_data = \
            load_EOD_data(data_path, market_name, self.tickers, steps)

        self.parameters = copy.copy(parameters)
        self.steps = steps  ## depends on your strategy(default one-day)
        self.epochs = epochs
        if batch_size is None:
            self.batch_size = len(self.tickers)
        else:
            self.batch_size = batch_size

        self.valid_index = 756
        self.test_index = 1008
        self.trade_dates = self.mask_data.shape[1]
        self.fea_dim = 5

        self.gpu = gpu
Esempio n. 2
0
    def __init__(self,
                 data_path,
                 market_name,
                 tickers_fname,
                 relation_name,
                 emb_fname,
                 parameters,
                 steps=1,
                 epochs=50,
                 batch_size=None,
                 flat=False,
                 gpu=False,
                 in_pro=False):

        seed = 123456789
        random.seed(seed)
        np.random.seed(seed)
        tf.set_random_seed(seed)

        self.data_path = data_path
        self.market_name = market_name
        self.tickers_fname = tickers_fname
        self.relation_name = relation_name
        # load data
        self.tickers = np.genfromtxt(os.path.join(data_path, '..',
                                                  tickers_fname),
                                     dtype=str,
                                     delimiter='\t',
                                     skip_header=False)

        print('#tickers selected:', len(self.tickers))
        self.eod_data, self.mask_data, self.gt_data, self.price_data = \
            load_EOD_data(data_path, market_name, self.tickers, steps)

        # relation data
        rname_tail = {
            'sector_industry': '_industry_relation.npy',
            'wikidata': '_wiki_relation.npy'
        }

        self.rel_encoding, self.rel_mask = load_relation_data(
            os.path.join(self.data_path, '..', 'relation', self.relation_name,
                         self.market_name + rname_tail[self.relation_name]))
        print('relation encoding shape:', self.rel_encoding.shape)
        print('relation mask shape:', self.rel_mask.shape)

        self.embedding = np.load(
            os.path.join(self.data_path, '..', 'pretrain', emb_fname))
        print('embedding shape:', self.embedding.shape)

        self.parameters = copy.copy(parameters)
        self.steps = steps
        self.epochs = epochs
        self.flat = flat
        self.inner_prod = in_pro
        if batch_size is None:
            self.batch_size = len(self.tickers)
        else:
            self.batch_size = batch_size

        self.valid_index = 756
        self.test_index = 1008
        self.trade_dates = self.mask_data.shape[1]
        self.fea_dim = 5

        self.gpu = gpu
Esempio n. 3
0
from load_data import load_EOD_data, load_relation_data

# params
data_path = '../data/2013-01-01'
market_name = 'NASDAQ'
tickers_fname = market_name + '_tickers_qualify_dr-0.98_min-5_smooth.csv'
steps = 1

# load stock data
tickers = np.genfromtxt(os.path.join(data_path, '..', tickers_fname),
                              dtype=str, delimiter='\t', skip_header=False)

print('#tickers selected:', len(tickers))
eod_data, mask_data, gt_data, price_data = \
    load_EOD_data(data_path, market_name, tickers, steps)

# corr tensor
corr_size = 30
num_companies, num_timesteps = gt_data.shape
correlation_matrix_shape = (num_timesteps - corr_size, num_companies, num_companies)
corr = np.ones(correlation_matrix_shape)
print(corr.shape)

iu = np.triu_indices(num_companies,k=1)
il = (iu[1],iu[0])

for t in tqdm(range(num_timesteps - corr_size)):
    for c1 in tqdm(range(num_companies)):
        for c2 in range(1, num_companies - c1):
            c1_movements = gt_data.T[t:t+corr_size][:,c1]
Esempio n. 4
0
    def __init__(self,
                 data_path,
                 market_name,
                 tickers_fname,
                 relation_name,
                 emb_fname,
                 parameters,
                 steps=1,
                 epochs=50,
                 batch_size=None,
                 flat=False,
                 gpu=False,
                 in_pro=False):

        seed = 123456789
        random.seed(seed)
        np.random.seed(seed)
        tf.set_random_seed(seed)

        self.data_path = data_path
        self.market_name = market_name
        self.tickers_fname = tickers_fname
        self.relation_name = relation_name
        # load data
        self.tickers = np.genfromtxt(os.path.join(data_path, '..',
                                                  tickers_fname),
                                     dtype=str,
                                     delimiter='\t',
                                     skip_header=False)

        print('#tickers selected:', len(self.tickers))
        self.eod_data, self.mask_data, self.gt_data, self.price_data = \
            load_EOD_data(data_path, market_name, self.tickers, steps)

        # relation data
        rname_tail = {
            'sector_industry': '_industry_relation.npy',
            'wikidata': '_wiki_relation.npy'
        }

        self.rel_encoding, self.rel_mask = load_relation_data(
            os.path.join(self.data_path, '..', 'relation', self.relation_name,
                         self.market_name + rname_tail[self.relation_name]))
        print('relation encoding shape:', self.rel_encoding.shape)
        print('relation mask shape:', self.rel_mask.shape)

        self.embedding = np.load(
            os.path.join(self.data_path, '..', 'pretrain', emb_fname))
        print('embedding shape:', self.embedding.shape)
        self.emb_tickers = np.genfromtxt(os.path.join(
            self.data_path, '..',
            self.market_name + '_tickers_qualify_dr-0.98_min-5_smooth.csv'),
                                         dtype=str,
                                         delimiter='\t',
                                         skip_header=False)
        new_emb = []
        for i, ticker in enumerate(self.tickers):
            ind = np.where(self.emb_tickers == ticker)[0][0]
            new_emb.append(self.embedding[ind][-len(self.eod_data[i]):])
        self.embedding = np.array(new_emb)

        self.parameters = copy.copy(parameters)
        self.steps = steps
        self.epochs = epochs
        self.flat = flat
        self.inner_prod = in_pro
        if batch_size is None:
            self.batch_size = len(self.tickers)
        else:
            self.batch_size = batch_size

        self.valid_index = 253
        self.test_index = 378
        print(f"-----\n{self.mask_data.shape}\n-----")
        self.trade_dates = self.mask_data.shape[1]
        self.fea_dim = 5

        self.gpu = gpu