예제 #1
0
def create_dataloaders(X_train, eval_set, weights, batch_size, num_workers,
                       drop_last, pin_memory):
    """
    Create dataloaders with or wihtout subsampling depending on weights and balanced.

    Parameters
    ----------
    X_train : np.ndarray
        Training data
    eval_set : list of np.array
        List of eval sets
    weights : either 0, 1, dict or iterable
        if 0 (default) : no weights will be applied
        if 1 : classification only, will balanced class with inverse frequency
        if dict : keys are corresponding class values are sample weights
        if iterable : list or np array must be of length equal to nb elements
                      in the training set
    batch_size : int
        how many samples per batch to load
    num_workers : int
        how many subprocesses to use for data loading. 0 means that the data
        will be loaded in the main process
    drop_last : bool
        set to True to drop the last incomplete batch, if the dataset size is not
        divisible by the batch size. If False and the size of dataset is not
        divisible by the batch size, then the last batch will be smaller
    pin_memory : bool
        Whether to pin GPU memory during training

    Returns
    -------
    train_dataloader, valid_dataloader : torch.DataLoader, torch.DataLoader
        Training and validation dataloaders
    """
    need_shuffle, sampler = create_sampler(weights, X_train)

    train_dataloader = DataLoader(
        PredictDataset(X_train),
        batch_size=batch_size,
        sampler=sampler,
        shuffle=need_shuffle,
        num_workers=num_workers,
        drop_last=drop_last,
        pin_memory=pin_memory,
    )

    valid_dataloaders = []
    for X in eval_set:
        valid_dataloaders.append(
            DataLoader(
                PredictDataset(X),
                batch_size=batch_size,
                sampler=sampler,
                shuffle=need_shuffle,
                num_workers=num_workers,
                drop_last=drop_last,
                pin_memory=pin_memory,
            ))

    return train_dataloader, valid_dataloaders
예제 #2
0
    def predict(self, X):
        """
        Make predictions on a batch (valid)

        Parameters
        ----------
        X : a :tensor: `torch.Tensor`
            Input data

        Returns
        -------
        predictions : np.array
            Predictions of the regression problem
        """
        self.network.eval()
        dataloader = DataLoader(
            PredictDataset(X),
            batch_size=self.batch_size,
            shuffle=False,
        )

        results = []
        embedded_res = []
        for batch_nb, data in enumerate(dataloader):
            data = data.to(self.device).float()
            output, embeded_x, _ = self.network(data)
            predictions = output.cpu().detach().numpy()
            results.append(predictions)
            embedded_res.append(embeded_x.cpu().detach().numpy())
        res_output = np.vstack(results)
        embedded_inputs = np.vstack(embedded_res)
        return res_output, embedded_inputs
예제 #3
0
    def predict_proba(self, X):
        """
        Make predictions for classification on a batch (valid)

        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data

        Returns
        -------
            batch_outs: dict
        """
        self.network.eval()

        dataloader = DataLoader(PredictDataset(X),
                                batch_size=self.batch_size,
                                shuffle=False)

        for batch_nb, data in enumerate(dataloader):
            data = data.to(self.device).float()

            output, M_loss, M_explain, masks = self.network(data)
            predictions = torch.nn.Softmax(
                dim=1)(output).cpu().detach().numpy()
            if batch_nb == 0:
                res = predictions
            else:
                res = np.vstack([res, predictions])
        return res
예제 #4
0
    def predict(self, X):
        """
        Make predictions on a batch (valid)

        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data

        Returns
        -------
            predictions: np.array
                Predictions of the regression problem
        """
        self.network.eval()
        dataloader = DataLoader(PredictDataset(X),
                                batch_size=self.batch_size,
                                shuffle=False)

        for batch_nb, data in enumerate(dataloader):
            data = data.to(self.device).float()

            output, M_loss, M_explain, masks = self.network(data)
            predictions = output.cpu().detach().numpy().reshape(-1)
            if batch_nb == 0:
                res = predictions
            else:
                res = np.hstack([res, predictions])

        return res
예제 #5
0
    def predict_proba(self, X):
        """
        Make predictions for classification on a batch (valid)

        Parameters
        ----------
        X : a :tensor: `torch.Tensor`
            Input data

        Returns
        -------
        res : list of np.ndarray

        """
        self.network.eval()

        dataloader = DataLoader(
            PredictDataset(X),
            batch_size=self.batch_size,
            shuffle=False,
        )

        results = {}
        for data in dataloader:
            data = data.to(self.device).float()
            output, _ = self.network(data)
            predictions = [
                torch.nn.Softmax(dim=1)(task_output).cpu().detach().numpy()
                for task_output in output
            ]
            for task_idx in range(len(self.output_dim)):
                results[task_idx] = results.get(task_idx,
                                                []) + [predictions[task_idx]]
        res = [np.vstack(task_res) for task_res in results.values()]
        return res
예제 #6
0
    def predict(self, X):
        """
        Make predictions on a batch (valid)

        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data

        Returns
        -------
            predictions: np.array
                Predictions of the most probable class
        """
        self.network.eval()
        dataloader = DataLoader(PredictDataset(X),
                                batch_size=self.batch_size,
                                shuffle=False)

        for batch_nb, data in enumerate(dataloader):
            data = data.to(self.device).float()
            output, M_loss, M_explain, masks = self.network(data)
            predictions = torch.argmax(torch.nn.Softmax(dim=1)(output), dim=1)
            predictions = predictions.cpu().detach().numpy().reshape(-1)
            if batch_nb == 0:
                res = predictions
            else:
                res = np.hstack([res, predictions])

        return np.vectorize(self.preds_mapper.get)(res)
예제 #7
0
    def predict_proba(self, X):
        """
        Make predictions for classification on a batch (valid)

        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data

        Returns
        -------
            batch_outs: dict
        """
        self.network.eval()

        dataloader = DataLoader(PredictDataset(X),
                                batch_size=self.batch_size,
                                shuffle=False,
                                pin_memory=True)

        results = []
        for batch_nb, data in enumerate(dataloader):
            data = data.to(self.device).float()

            output, M_loss = self.network(data)
            predictions = torch.nn.Softmax(
                dim=1)(output).cpu().detach().numpy()
            results.append(predictions)
        res = np.vstack(results)
        return res
예제 #8
0
    def explain(self, X):
        """
        Return local explanation

        Parameters
        ----------
        X : tensor: `torch.Tensor`
            Input data

        Returns
        -------
        M_explain : matrix
            Importance per sample, per columns.
        masks : matrix
            Sparse matrix showing attention masks used by network.
        """
        try:
            self.model.eval()

            dataloader = DataLoader(
                PredictDataset(X),
                batch_size=self.batch_size,
                shuffle=False,
            )

            res_explain = []
            reducing_matrix = create_explain_matrix(self.model.input_dim, 0,
                                                    [], self.model.input_dim)
            for batch_nb, data in enumerate(dataloader):
                data = data.to(self.device).float()

                M_explain, masks = self.model.forward_masks(data)
                for key, value in masks.items():
                    masks[key] = csc_matrix.dot(value.cpu().detach().numpy(),
                                                reducing_matrix)

                res_explain.append(
                    csc_matrix.dot(M_explain.cpu().detach().numpy(),
                                   reducing_matrix))

                if batch_nb == 0:
                    res_masks = masks
                else:
                    for key, value in masks.items():
                        res_masks[key] = np.vstack([res_masks[key], value])

            res_explain = np.vstack(res_explain)

            return res_explain, res_masks
        finally:
            self.model.train()
예제 #9
0
    def predict(self, X):
        """
        Make predictions on a batch (valid)

        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data

        Returns
        -------
            predictions: np.array
                Predictions of the most probable class
        """
        self.network.eval()
        dataloader = DataLoader(
            PredictDataset(X),
            batch_size=self.batch_size,
            shuffle=False,
            pin_memory=True
        )

        results = {}
        for data in dataloader:
            data = data.to(self.device).float()
            output, _ = self.network(data)
            predictions = [
                torch.argmax(torch.nn.Softmax(dim=1)(task_output), dim=1)
                .cpu()
                .detach()
                .numpy()
                .reshape(-1)
                for task_output in output
            ]

            for task_idx in range(len(self.output_dim)):
                results[task_idx] = results.get(task_idx, []) + [predictions[task_idx]]
        # stack all task individually
        results = [np.hstack(task_res) for task_res in results.values()]
        # map all task individually
        results = [
            np.vectorize(self.preds_mapper[task_idx].get)(task_res)
            for task_idx, task_res in enumerate(results)
        ]
        return results
예제 #10
0
    def explain(self, X):
        """
        Return local explanation

        Parameters
        ----------
            data: a :tensor: `torch.Tensor`
                Input data
            target: a :tensor: `torch.Tensor`
                Target data

        Returns
        -------
            M_explain: matrix
                Importance per sample, per columns.
            masks: matrix
                Sparse matrix showing attention masks used by network.
        """
        self.network.eval()

        dataloader = DataLoader(PredictDataset(X),
                                batch_size=self.batch_size,
                                shuffle=False)

        for batch_nb, data in enumerate(dataloader):
            data = data.to(self.device).float()

            output, M_loss, M_explain, masks = self.network(data)
            for key, value in masks.items():
                masks[key] = csc_matrix.dot(value.cpu().detach().numpy(),
                                            self.reducing_matrix)

            if batch_nb == 0:
                res_explain = csc_matrix.dot(M_explain.cpu().detach().numpy(),
                                             self.reducing_matrix)
                res_masks = masks
            else:
                res_explain = np.vstack([
                    res_explain,
                    csc_matrix.dot(M_explain.cpu().detach().numpy(),
                                   self.reducing_matrix)
                ])
                for key, value in masks.items():
                    res_masks[key] = np.vstack([res_masks[key], value])
        return res_explain, res_masks
예제 #11
0
    def predict(self, X):
        """
        Make predictions on a batch (valid)

        Parameters
        ----------
        X : a :tensor: `torch.Tensor`
            Input data

        Returns
        -------
        predictions : np.array
            Predictions of the regression problem
        """
        self.network.eval()
        dataloader = DataLoader(
            PredictDataset(X),
            batch_size=self.batch_size,
            shuffle=False,
        )

        results = []
        for batch_nb, data in enumerate(dataloader):
            data = data.to(self.device).float()

            size = data.shape[0]
            zeros = torch.zeros(size)
            ones = torch.ones(size)

            zeros = torch.unsqueeze(zeros, 1)
            ones = torch.unsqueeze(ones, 1)

            output0, M_loss = self.network(torch.cat((data, zeros), dim=1))
            output1, M_loss = self.network(torch.cat((data, ones), dim=1))

            m = nn.Softmax(dim=1)
            output0 = m(output0)
            output1 = m(output1)
            output = output1 - output0
            # output, M_loss = self.network(data)
            predictions = output.cpu().detach().numpy()

            results.append(predictions)
        res = np.vstack(results)
        return self.predict_func(res)
예제 #12
0
    def predict_proba(self, X):
        """
        Make predictions for classification on a batch (valid)

        Parameters
        ----------
        X : a :tensor: `torch.Tensor`
            Input data

        Returns
        -------
        res : np.ndarray

        """
        self.network.eval()

        dataloader = DataLoader(
            PredictDataset(X),
            batch_size=X.shape[0],
            shuffle=False,
        )

        results = []
        for batch_nb, data in enumerate(dataloader):
            data = data.to(self.device).float()

            output, M_loss, mu1, mu0 = self.network(data)
            # predictions = torch.nn.Sigmoid(dim=1)(output).cpu().detach().numpy()

            # print(output)
            # print(output.shape)
            # print(type(output))

            predictions = output.cpu().detach().numpy()
            results.append(predictions)
        res = np.vstack(results)
        return res