Esempio n. 1
0
    def predict_on_stock(self, stock: Stock, mode='val') -> dict:
        dataset = self.get_dataset(mode)
        generator = batch_generator(
            dataset,
            batch_size=self.experiment.trainer.batch_size,
            randomize=False,
            augmentations=self.experiment.trainer.augmentations['val'],
            ind_gen=self.iter_stock(stock, mode))

        predictions = []
        for batch_in, batch_out in generator:
            # todo: predict all batches
            raise NotImplementedError()
        # concat predictions

        data = {}
        generator = batch_generator(
            dataset,
            batch_size=self.experiment.trainer.batch_size,
            randomize=False,
            augmentations=self.experiment.trainer.augmentations['val'],
            ind_gen=self.iter_stock(stock, mode))
        for i, p in zip(generator, predictions):
            raw_data = self.get_raw_data(
                i, dataset)  # dict with open, high low close, volume, date

            for key, val in raw_data.items():
                data.setdefault(key, []).append(val)

            data.setdefault('prediction', p)
        return data
        """ 
        returns dict = {
            Open: [...]
            High: [...]
            low: [...]
            close: [...]
            volume: [...]
            prediction: [...]
            }
        """
        raise NotImplementedError()
Esempio n. 2
0
def predict_on_dataset(dataset, model, out_folder, batch_size=512):
    generator = batch_generator(dataset, batch_size=batch_size, randomize=False,
                                ind_gen=(i for i in range(len(dataset))))

    predictions = []
    for batch_in, batch_out in tqdm(generator):
        predictions.extend(list(model.predict_on_batch(batch_in)))

    assert len(predictions) == len(dataset), '{}  {}'.format(len(predictions), len(dataset))
    csv_data = {}
    for pred, sample in zip(predictions, dataset):
        sample['prediction'] = pred
        csv_data.setdefault(sample['stock'], []).append(sample)
    csv_data = {key: flatten_dictionaries(val) for key, val in csv_data.items()}

    for stock, dict in csv_data.items():
        dump_to_csv(os.path.join(out_folder, stock.stock_tckt + '.csv'), dict)
Esempio n. 3
0
    def test_batch_generator_random(self):
        dataset = get_test_dataset()

        batch_size = 1

        last_sample = None
        is_random = False
        for i in range(3):
            gen = batch_generator(dataset, batch_size=batch_size)

            sample = next(gen)
            if last_sample is not None:
                if not all(
                    (np.prod(ls == s)) for ls, s in zip(last_sample, sample)):
                    is_random = True
                    break

            last_sample = sample

        assert last_sample is not None
        assert is_random
Esempio n. 4
0
    def test_batch_generator(self):
        dataset = get_test_dataset()

        batch_size = 5

        def ind_gen(return_as_batch):
            keys = list(self.TEST_CASES)

            x = [keys] * batch_size
            for pair in product(*x):
                if return_as_batch:
                    yield pair
                else:
                    for i in pair:
                        yield i

        gen = batch_generator(dataset,
                              batch_size=batch_size,
                              randomize=False,
                              ind_gen=ind_gen(False))

        for batch_indices, (batch_inputs,
                            batch_outputs) in zip(ind_gen(True), gen):
            assert isinstance(batch_inputs, np.ndarray)
            assert isinstance(batch_outputs, np.ndarray)
            assert batch_outputs.shape[0] == batch_inputs.shape[0] == batch_size
            assert batch_inputs.shape[1] == dataset.time_sample_length
            assert batch_outputs.shape[
                1] == 2  # "rise" output default is size 2

            inputs = np.stack(
                [self.TEST_CASES[i]['inputs'] for i in batch_indices], axis=0)
            outputs = np.stack(
                [self.TEST_CASES[i]['output'] for i in batch_indices], axis=0)

            assert np.prod(
                batch_inputs == inputs), '%d: %s' % (batch_indices, 'inputs')
            assert np.prod(
                batch_outputs == outputs), '%d: %s' % (batch_indices,
                                                       'outputs')
Esempio n. 5
0
 def get_batch_generator(self, mode):
     return batch_generator({'train': self.experiment.train_dataset, 'val': self.experiment.val_dataset}[mode],
                            batch_size=self.batch_size, randomize=self.randomize,
                            augmentations=self.augmentations[mode])