def _load_data(self, dataset, train_split, batch_size): self._adjacency = dataset.adjacency_matrix self._ftr_vec = dataset.ftr_vec # calculate lengths off train and dev according to split ~ (0,1) len_train = int(len(dataset) * train_split) len_dev = len(dataset) - len_train # split dataset train, dev = random_split(dataset, (len_train, len_dev)) # set train loader self._balanced_train_loader = DataLoader( train.dataset, batch_size=batch_size, sampler=ImbalancedDatasetSampler(train.dataset, indices=train.indices, num_samples=len(train.indices))) # set train loader self._unbalanced_train_loader = DataLoader(train, batch_size=len_train, shuffle=True) # set validation loader self._dev_loader = DataLoader( dev, batch_size=len_dev, )
def _load_data(self, train_dataset, dev_dataset, test_dataset, dev_split, test_split, batch_size): # calculate lengths off train and dev according to split ~ (0,1) len_dev = 0 if dev_dataset else int(len(train_dataset) * dev_split) len_test = 0 if test_dataset else int(len(train_dataset) * test_split) len_train = len(train_dataset) - len_test - len_dev # split dataset train, dev, test = random_split(train_dataset, (len_train, len_dev, len_test)) # set train loader self._balanced_train_loader = DataLoader( train.dataset, batch_size=batch_size, collate_fn=train.dataset.collate_fn, sampler=ImbalancedDatasetSampler(train.dataset, indices=train.indices.tolist(), num_samples=len( train.indices.tolist())) # shuffle=True ) # set train loader self._unbalanced_train_loader = DataLoader( train.dataset, batch_size=batch_size, collate_fn=train.dataset.collate_fn, sampler=SubsetRandomSampler(train.indices.tolist()) # shuffle=True ) # set validation loader self._dev_loader = DataLoader( dev_dataset, batch_size=batch_size, collate_fn=dev_dataset.collate_fn, ) if dev_dataset else DataLoader( dev, batch_size=batch_size, collate_fn=dev.dataset.collate_fn, # sampler=SubsetRandomSampler(dev.indices.tolist()) # shuffle=True ) # set test loader self._test_loader = DataLoader( test_dataset, batch_size=batch_size, collate_fn=test_dataset.collate_fn, ) if test_dataset else DataLoader( test, batch_size=batch_size, collate_fn=test.dataset.collate_fn, # sampler=SubsetRandomSampler(test.indices.tolist()) # shuffle=True )
def _load_data(self, train_dataset, dev_dataset, test_dataset, dev_split, test_split): # calculate lengths off train and dev according to split ~ (0,1) len_dev = 0 if dev_dataset else int(len(train_dataset) * dev_split) len_test = 0 if test_dataset else int(len(train_dataset) * test_split) len_train = len(train_dataset) - len_test - len_dev # split dataset train, dev, test = random_split(train_dataset, (len_train, len_dev, len_test)) dev = dev_dataset if dev_dataset else dev test = test_dataset if test_dataset else test # set train loader self._balanced_train_loader = DataLoader( train.dataset, batch_size=1, sampler=ImbalancedDatasetSampler(train.dataset) # shuffle=True ) # set train loader self._unbalanced_train_loader = DataLoader( train.dataset, batch_size=1, # sampler=ImbalancedDatasetSampler(train.dataset) # shuffle=True ) # set validation loader self._dev_loader = DataLoader( dev, batch_size=1, # sampler=ImbalancedDatasetSampler(dev) # shuffle=True ) # set train loader self._test_loader = DataLoader( test, batch_size=1, # sampler=ImbalancedDatasetSampler(test) # shuffle=True )
data[gnx_id] = (A, D, gnx_vec, embed_vec, self._labels[gnx_id]) data = self._z_score_all_data(data) pickle.dump((data, idx_to_name), open(pkl_path, "wb")) return data, idx_to_name def __getitem__(self, index): gnx_id = self._idx_to_name[index] A, D, x, embed, label = self._data[gnx_id] embed = 0 if embed is None else Tensor(embed).long() return Tensor(A.todense()), Tensor(D), Tensor(x), embed, label def __len__(self): return len(self._idx_to_name) if __name__ == "__main__": from params.protein_params import ProteinDatasetTrainParams from torch.utils.data import DataLoader from dataset.datset_sampler import ImbalancedDatasetSampler ds = BilinearDataset(ProteinDatasetTrainParams()) # ds = BilinearDataset(AidsDatasetTestParams()) dl = DataLoader(dataset=ds, batch_size=1, sampler=ImbalancedDatasetSampler(ds)) p = [] for i, (A, D, x, l) in enumerate(dl): print(i, A, D, x, l) p.append(l.item()) e = 0