Exemple #1
0
    def _make_loop(self, nstep_data, shift_data):
        """
        Unbatches data to original format with extra 1-dimension at the batch dimension.
        Length of sequences has been shortened to account for shift of data for sequence to sequence modeling.
        Length of sequences has been potentially shortened to be evenly divided by nsteps:
            nsim = (nsim - shift) - (nsim % nsteps)

        :return: train_loop (dict str: 3-way np.array} Dictionary with values of shape nsim % X 1 X dim
                 dev_loop  see train_data
                 test_loop  see train_data
        """
        if self.name == 'openloop':
            if self.batch_type == 'mh':
                loop = self.unbatch_mh(nstep_data)
            else:
                loop = self.unbatch(nstep_data)

            for k in loop.keys():
                assert np.array_equal(loop[k].squeeze(1), shift_data[k][:loop[k].shape[0]]), \
                    f'Reshaped data {k} is not equal to truncated original data'
            plot.plot_traj({k: v.squeeze(1)
                            for k, v in loop.items()},
                           figname=os.path.join(self.savedir,
                                                f'{self.system}_open.png'))
            plt.close('all')

        return loop
Exemple #2
0
    def make_nstep(self, overwrite=False):
        """

        :param overwrite: Whether to overwrite a dataset sequence if it already exists in the dataset.
        :return: train_data (dict str: 3-way torch.Tensor} Dictionary with values of shape Nsteps X Nbatches X dim
                 dev_data  see train_data
                 test_data see train_data
        """
        for k, v in self.data.items():
            if k + 'p' not in self.shift_data or overwrite:
                self.dims[k + 'p'], self.dims[k + 'f'] = v.shape, v.shape
                self.shift_data[k + 'p'] = v[:-self.nsteps]
                self.shift_data[k + 'f'] = v[self.nsteps:]

                if self.batch_type == 'mh':
                    self.nstep_data[k + 'p'] = batch_mh_data(
                        self.shift_data[k + 'p'], self.nsteps)
                    self.nstep_data[k + 'f'] = batch_mh_data(
                        self.shift_data[k + 'f'], self.nsteps)
                else:
                    self.nstep_data[k + 'p'] = batch_data(
                        self.shift_data[k + 'p'], self.nsteps)
                    self.nstep_data[k + 'f'] = batch_data(
                        self.shift_data[k + 'f'], self.nsteps)
        plot.plot_traj(self.data,
                       figname=os.path.join(self.savedir,
                                            f'{self.system}.png'))
        plt.close('all')
        train_data, dev_data, test_data = self.split_train_test_dev(
            self.nstep_data)
        train_data.name, dev_data.name, test_data.name = 'nstep_train', 'nstep_dev', 'nstep_test'
        return train_data, dev_data, test_data
Exemple #3
0
    def __init__(self,
                 system='fsw_phase_2',
                 nsim=10000000,
                 ninit=0,
                 norm=['Y'],
                 batch_type='batch',
                 nsteps=1,
                 device='cpu',
                 sequences=dict(),
                 name='openloop',
                 savedir='test',
                 split={
                     'train': [0],
                     'dev': [0],
                     'test': [0]
                 },
                 norm_type='zero-one'):
        """
        :param split: (2-tuple of float) First index is proportion of experiments from train, second is proportion from dev,
                       leftover are for test set.
                       or (dict {str:list}) with keys 'train', 'dev', 'test' and values indicating experiment numbers to be included in each split

         returns: Dataset Object with public properties:
                    train_data: dict(str: Tensor)
                    dev_data: dict(str: Tensor)
                    test_data: dict(str: Tensor)
                    train_loop: dict(str: Tensor)
                    dev_loop: dict(str: Tensor)
                    test_loop: dict(str: Tensor)
                    dims: dict(str: tuple)
        """
        assert not (system is None and len(sequences)
                    == 0), 'Trying to instantiate an empty dataset.'
        self.name = name
        self.norm_type = norm_type
        self.norm_fn = norm_fns[norm_type]
        self.savedir = savedir
        os.makedirs(self.savedir, exist_ok=True)
        self.system, self.nsim, self.ninit, self.norm, self.nsteps, self.device = system, nsim, ninit, norm, nsteps, device
        self.batch_type = batch_type
        self.min_max_norms = dict()
        self.data = self.load_data()
        self.data = {**self.data, **sequences}
        self.data = self.norm_data(self.data, self.norm)
        plot.plot_traj(self.data,
                       figname=os.path.join(self.savedir,
                                            f'{self.system}.png'))
        plt.close('all')
        self.experiments = self.split_data_by_experiment()
        self.nstep_data, self.loop_data = self.make_nstep_loop()
        self.nstep_data, self.loop_data = self.to_tensor(
            self.nstep_data), self.to_tensor(self.loop_data)
        self.split_train_test_dev(split)
        self.train_data = self.listDict_to_dictTensor(self.train_data)
        self.dev_data = self.listDict_to_dictTensor(self.dev_data)
        self.test_data = self.listDict_to_dictTensor(self.test_data)
        self.dims = self.get_dims()
        self.name_data()
Exemple #4
0
 def eval(self, outputs):
     data = {
         k: unbatch_data(v).squeeze(1).detach().cpu().numpy()
         for (k, v) in outputs.items()
         if any([plt_k in k for plt_k in self.plot_keys])
     }
     for k, v in data.items():
         plot.plot_traj({k: v}, figname=None)
     return dict()
Exemple #5
0
    def make_loop(self):
        """
        Unbatches data to original format with extra 1-dimension at the batch dimension.
        Length of sequences has been shortened to account for shift of data for sequence to sequence modeling.
        Length of sequences has been potentially shortened to be evenly divided by nsteps:
            nsim = (nsim - shift) - (nsim % nsteps)

        :return: train_loop (dict str: 3-way np.array} Dictionary with values of shape nsim % X 1 X dim
                 dev_loop  see train_data
                 test_loop  see train_data
        """
        if self.name == 'openloop':
            if self.batch_type == 'mh':
                train_loop = self.unbatch_mh(self.train_data)
                dev_loop = self.unbatch_mh(self.dev_data)
                test_loop = self.unbatch_mh(self.test_data)
            else:
                train_loop = self.unbatch(self.train_data)
                dev_loop = self.unbatch(self.dev_data)
                test_loop = self.unbatch(self.test_data)

            all_loop = {
                k: np.concatenate([train_loop[k], dev_loop[k],
                                   test_loop[k]]).squeeze(1)
                for k in self.train_data.keys()
            }
            for k in self.train_data.keys():
                assert np.array_equal(all_loop[k], self.shift_data[k][:all_loop[k].shape[0]]), \
                    f'Reshaped data {k} is not equal to truncated original data'
            plot.plot_traj(all_loop,
                           figname=os.path.join(self.savedir,
                                                f'{self.system}_open.png'))
            plt.close('all')

        elif self.name == 'closedloop':
            nstep_data = dict()
            for k, v in self.data.items():
                nstep_data[k + 'p'] = batch_mh_data(self.shift_data[k + 'p'],
                                                    self.nsteps)
                nstep_data[k + 'f'] = batch_mh_data(self.shift_data[k + 'f'],
                                                    self.nsteps)
            train_loop, dev_loop, test_loop = self.split_train_test_dev(
                nstep_data)

        train_loop.name, dev_loop.name, test_loop.name = 'loop_train', 'loop_dev', 'loop_test'
        for dset in train_loop, dev_loop, test_loop, self.train_data, self.dev_data, self.test_data:
            for k, v in dset.items():
                dset[k] = torch.tensor(v, dtype=torch.float32).to(self.device)
        return train_loop, dev_loop, test_loop