def _make_loop(self, nstep_data, shift_data): """ Unbatches data to original format with extra 1-dimension at the batch dimension. Length of sequences has been shortened to account for shift of data for sequence to sequence modeling. Length of sequences has been potentially shortened to be evenly divided by nsteps: nsim = (nsim - shift) - (nsim % nsteps) :return: train_loop (dict str: 3-way np.array} Dictionary with values of shape nsim % X 1 X dim dev_loop see train_data test_loop see train_data """ if self.name == 'openloop': if self.batch_type == 'mh': loop = self.unbatch_mh(nstep_data) else: loop = self.unbatch(nstep_data) for k in loop.keys(): assert np.array_equal(loop[k].squeeze(1), shift_data[k][:loop[k].shape[0]]), \ f'Reshaped data {k} is not equal to truncated original data' plot.plot_traj({k: v.squeeze(1) for k, v in loop.items()}, figname=os.path.join(self.savedir, f'{self.system}_open.png')) plt.close('all') return loop
def make_nstep(self, overwrite=False): """ :param overwrite: Whether to overwrite a dataset sequence if it already exists in the dataset. :return: train_data (dict str: 3-way torch.Tensor} Dictionary with values of shape Nsteps X Nbatches X dim dev_data see train_data test_data see train_data """ for k, v in self.data.items(): if k + 'p' not in self.shift_data or overwrite: self.dims[k + 'p'], self.dims[k + 'f'] = v.shape, v.shape self.shift_data[k + 'p'] = v[:-self.nsteps] self.shift_data[k + 'f'] = v[self.nsteps:] if self.batch_type == 'mh': self.nstep_data[k + 'p'] = batch_mh_data( self.shift_data[k + 'p'], self.nsteps) self.nstep_data[k + 'f'] = batch_mh_data( self.shift_data[k + 'f'], self.nsteps) else: self.nstep_data[k + 'p'] = batch_data( self.shift_data[k + 'p'], self.nsteps) self.nstep_data[k + 'f'] = batch_data( self.shift_data[k + 'f'], self.nsteps) plot.plot_traj(self.data, figname=os.path.join(self.savedir, f'{self.system}.png')) plt.close('all') train_data, dev_data, test_data = self.split_train_test_dev( self.nstep_data) train_data.name, dev_data.name, test_data.name = 'nstep_train', 'nstep_dev', 'nstep_test' return train_data, dev_data, test_data
def __init__(self, system='fsw_phase_2', nsim=10000000, ninit=0, norm=['Y'], batch_type='batch', nsteps=1, device='cpu', sequences=dict(), name='openloop', savedir='test', split={ 'train': [0], 'dev': [0], 'test': [0] }, norm_type='zero-one'): """ :param split: (2-tuple of float) First index is proportion of experiments from train, second is proportion from dev, leftover are for test set. or (dict {str:list}) with keys 'train', 'dev', 'test' and values indicating experiment numbers to be included in each split returns: Dataset Object with public properties: train_data: dict(str: Tensor) dev_data: dict(str: Tensor) test_data: dict(str: Tensor) train_loop: dict(str: Tensor) dev_loop: dict(str: Tensor) test_loop: dict(str: Tensor) dims: dict(str: tuple) """ assert not (system is None and len(sequences) == 0), 'Trying to instantiate an empty dataset.' self.name = name self.norm_type = norm_type self.norm_fn = norm_fns[norm_type] self.savedir = savedir os.makedirs(self.savedir, exist_ok=True) self.system, self.nsim, self.ninit, self.norm, self.nsteps, self.device = system, nsim, ninit, norm, nsteps, device self.batch_type = batch_type self.min_max_norms = dict() self.data = self.load_data() self.data = {**self.data, **sequences} self.data = self.norm_data(self.data, self.norm) plot.plot_traj(self.data, figname=os.path.join(self.savedir, f'{self.system}.png')) plt.close('all') self.experiments = self.split_data_by_experiment() self.nstep_data, self.loop_data = self.make_nstep_loop() self.nstep_data, self.loop_data = self.to_tensor( self.nstep_data), self.to_tensor(self.loop_data) self.split_train_test_dev(split) self.train_data = self.listDict_to_dictTensor(self.train_data) self.dev_data = self.listDict_to_dictTensor(self.dev_data) self.test_data = self.listDict_to_dictTensor(self.test_data) self.dims = self.get_dims() self.name_data()
def eval(self, outputs): data = { k: unbatch_data(v).squeeze(1).detach().cpu().numpy() for (k, v) in outputs.items() if any([plt_k in k for plt_k in self.plot_keys]) } for k, v in data.items(): plot.plot_traj({k: v}, figname=None) return dict()
def make_loop(self): """ Unbatches data to original format with extra 1-dimension at the batch dimension. Length of sequences has been shortened to account for shift of data for sequence to sequence modeling. Length of sequences has been potentially shortened to be evenly divided by nsteps: nsim = (nsim - shift) - (nsim % nsteps) :return: train_loop (dict str: 3-way np.array} Dictionary with values of shape nsim % X 1 X dim dev_loop see train_data test_loop see train_data """ if self.name == 'openloop': if self.batch_type == 'mh': train_loop = self.unbatch_mh(self.train_data) dev_loop = self.unbatch_mh(self.dev_data) test_loop = self.unbatch_mh(self.test_data) else: train_loop = self.unbatch(self.train_data) dev_loop = self.unbatch(self.dev_data) test_loop = self.unbatch(self.test_data) all_loop = { k: np.concatenate([train_loop[k], dev_loop[k], test_loop[k]]).squeeze(1) for k in self.train_data.keys() } for k in self.train_data.keys(): assert np.array_equal(all_loop[k], self.shift_data[k][:all_loop[k].shape[0]]), \ f'Reshaped data {k} is not equal to truncated original data' plot.plot_traj(all_loop, figname=os.path.join(self.savedir, f'{self.system}_open.png')) plt.close('all') elif self.name == 'closedloop': nstep_data = dict() for k, v in self.data.items(): nstep_data[k + 'p'] = batch_mh_data(self.shift_data[k + 'p'], self.nsteps) nstep_data[k + 'f'] = batch_mh_data(self.shift_data[k + 'f'], self.nsteps) train_loop, dev_loop, test_loop = self.split_train_test_dev( nstep_data) train_loop.name, dev_loop.name, test_loop.name = 'loop_train', 'loop_dev', 'loop_test' for dset in train_loop, dev_loop, test_loop, self.train_data, self.dev_data, self.test_data: for k, v in dset.items(): dset[k] = torch.tensor(v, dtype=torch.float32).to(self.device) return train_loop, dev_loop, test_loop