def _get_dl(self, s): if self.dataset == "opportunity": from filternet.datasets.opportunity import get_x_y_contig elif self.dataset == "smartphone_hapt": from filternet.datasets.smartphone_hapt import get_x_y_contig elif self.dataset == "har": from filternet.datasets.har import get_x_y_contig elif self.dataset == "intention_recognition": from filternet.datasets.intention_recognition import get_x_y_contig else: raise ValueError(f"Unknown dataset {self.dataset}") kwargs = {} if self.y_cols: kwargs["y_cols"] = self.y_cols if self.sensor_subset: kwargs["sensor_subset"] = self.sensor_subset Xc, ycs, data_spec = get_x_y_contig(s, **kwargs) if s == "train": # Training shuffles, and we set epoch size to length of the dataset. We can set train_step as # small as we want to get more windows; we'll only run len(Sc)/win_len of them in each training # epoch. self.epoch_iters = int(len(Xc) / self.decimation) X, ys = sliding_window_x_y( Xc, ycs, win_len=self.win_len, step=self.train_step, shuffle=False ) # Set the overall data spec using the training set, # and modify later if more info is needed. self.data_spec = data_spec else: # Val and test data are not shuffled. # Each point is inferred ~twice b/c step = win_len/2 X, ys = sliding_window_x_y( Xc, ycs, win_len=self.win_len, step=int(self.win_len / 2), shuffle=False, # Cannot be true with windows ) dl = DataLoader( TensorDataset(torch.Tensor(X), *[torch.Tensor(y).long() for y in ys]), batch_size=self.batch_size, shuffle=True if s == "train" else False, ) return dl
def x_y_dict(): wl = 64 xys = {} for which_set in ["train", "val", "test"]: Xc, ycs, data_spec = opp.get_x_y_contig(which_set) X, ys = sliding_window_x_y(Xc, ycs, win_len=wl) assert X.shape[1] == 113 assert X.shape[2] == wl assert ys[0].shape[1] == wl xys["X_" + which_set] = torch.Tensor(X) xys["ys_" + which_set] = [torch.Tensor(y).long() for y in ys] xys["win_len"] = wl return xys
def test_get_x_y(dfs_dict): lens = {} for which_set in ["train", "train+val", "val", "test"]: Xc, ycs, data_spec = ds.get_x_y_contig(which_set, dfs_dict=dfs_dict) wl = 128 X, ys = sliding_window_x_y(Xc, ycs, win_len=wl) assert X.shape[1] == data_spec["input_channels"] assert X.shape[2] == wl for y in ys: assert y.shape[1] == wl lens[which_set] = len(Xc) assert len(data_spec["input_features"]) == data_spec["input_channels"] assert data_spec["n_outputs"] == len(data_spec["output_spec"]) for o in data_spec["output_spec"]: assert "name" in o assert o["num_classes"] == len(o["classes"]) assert "dataset_name" in data_spec assert lens["train"] + lens["val"] == lens["train+val"]