예제 #1
0
    def _load_data(self, dataset, train_split, batch_size):
        self._adjacency = dataset.adjacency_matrix
        self._ftr_vec = dataset.ftr_vec
        # calculate lengths off train and dev according to split ~ (0,1)
        len_train = int(len(dataset) * train_split)
        len_dev = len(dataset) - len_train
        # split dataset
        train, dev = random_split(dataset, (len_train, len_dev))

        # set train loader
        self._balanced_train_loader = DataLoader(
            train.dataset,
            batch_size=batch_size,
            sampler=ImbalancedDatasetSampler(train.dataset,
                                             indices=train.indices,
                                             num_samples=len(train.indices)))
        # set train loader
        self._unbalanced_train_loader = DataLoader(train,
                                                   batch_size=len_train,
                                                   shuffle=True)

        # set validation loader
        self._dev_loader = DataLoader(
            dev,
            batch_size=len_dev,
        )
예제 #2
0
    def _load_data(self, train_dataset, dev_dataset, test_dataset, dev_split,
                   test_split, batch_size):
        # calculate lengths off train and dev according to split ~ (0,1)
        len_dev = 0 if dev_dataset else int(len(train_dataset) * dev_split)
        len_test = 0 if test_dataset else int(len(train_dataset) * test_split)
        len_train = len(train_dataset) - len_test - len_dev

        # split dataset
        train, dev, test = random_split(train_dataset,
                                        (len_train, len_dev, len_test))

        # set train loader
        self._balanced_train_loader = DataLoader(
            train.dataset,
            batch_size=batch_size,
            collate_fn=train.dataset.collate_fn,
            sampler=ImbalancedDatasetSampler(train.dataset,
                                             indices=train.indices.tolist(),
                                             num_samples=len(
                                                 train.indices.tolist()))
            # shuffle=True
        )
        # set train loader
        self._unbalanced_train_loader = DataLoader(
            train.dataset,
            batch_size=batch_size,
            collate_fn=train.dataset.collate_fn,
            sampler=SubsetRandomSampler(train.indices.tolist())
            # shuffle=True
        )

        # set validation loader
        self._dev_loader = DataLoader(
            dev_dataset,
            batch_size=batch_size,
            collate_fn=dev_dataset.collate_fn,
        ) if dev_dataset else DataLoader(
            dev,
            batch_size=batch_size,
            collate_fn=dev.dataset.collate_fn,
            # sampler=SubsetRandomSampler(dev.indices.tolist())
            # shuffle=True
        )

        # set test loader
        self._test_loader = DataLoader(
            test_dataset,
            batch_size=batch_size,
            collate_fn=test_dataset.collate_fn,
        ) if test_dataset else DataLoader(
            test,
            batch_size=batch_size,
            collate_fn=test.dataset.collate_fn,
            # sampler=SubsetRandomSampler(test.indices.tolist())
            # shuffle=True
        )
    def _load_data(self, train_dataset, dev_dataset, test_dataset, dev_split,
                   test_split):
        # calculate lengths off train and dev according to split ~ (0,1)
        len_dev = 0 if dev_dataset else int(len(train_dataset) * dev_split)
        len_test = 0 if test_dataset else int(len(train_dataset) * test_split)
        len_train = len(train_dataset) - len_test - len_dev
        # split dataset
        train, dev, test = random_split(train_dataset,
                                        (len_train, len_dev, len_test))

        dev = dev_dataset if dev_dataset else dev
        test = test_dataset if test_dataset else test

        # set train loader
        self._balanced_train_loader = DataLoader(
            train.dataset,
            batch_size=1,
            sampler=ImbalancedDatasetSampler(train.dataset)
            # shuffle=True
        )
        # set train loader
        self._unbalanced_train_loader = DataLoader(
            train.dataset,
            batch_size=1,
            # sampler=ImbalancedDatasetSampler(train.dataset)
            # shuffle=True
        )
        # set validation loader
        self._dev_loader = DataLoader(
            dev,
            batch_size=1,
            # sampler=ImbalancedDatasetSampler(dev)
            # shuffle=True
        )
        # set train loader
        self._test_loader = DataLoader(
            test,
            batch_size=1,
            # sampler=ImbalancedDatasetSampler(test)
            # shuffle=True
        )
예제 #4
0
            data[gnx_id] = (A, D, gnx_vec, embed_vec, self._labels[gnx_id])

        data = self._z_score_all_data(data)
        pickle.dump((data, idx_to_name), open(pkl_path, "wb"))
        return data, idx_to_name

    def __getitem__(self, index):
        gnx_id = self._idx_to_name[index]
        A, D, x, embed, label = self._data[gnx_id]
        embed = 0 if embed is None else Tensor(embed).long()
        return Tensor(A.todense()), Tensor(D), Tensor(x), embed, label

    def __len__(self):
        return len(self._idx_to_name)


if __name__ == "__main__":
    from params.protein_params import ProteinDatasetTrainParams
    from torch.utils.data import DataLoader
    from dataset.datset_sampler import ImbalancedDatasetSampler
    ds = BilinearDataset(ProteinDatasetTrainParams())
    # ds = BilinearDataset(AidsDatasetTestParams())
    dl = DataLoader(dataset=ds,
                    batch_size=1,
                    sampler=ImbalancedDatasetSampler(ds))
    p = []
    for i, (A, D, x, l) in enumerate(dl):
        print(i, A, D, x, l)
        p.append(l.item())
    e = 0