def create_dataloaders(X_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory): """ Create dataloaders with or wihtout subsampling depending on weights and balanced. Parameters ---------- X_train : np.ndarray Training data eval_set : list of np.array List of eval sets weights : either 0, 1, dict or iterable if 0 (default) : no weights will be applied if 1 : classification only, will balanced class with inverse frequency if dict : keys are corresponding class values are sample weights if iterable : list or np array must be of length equal to nb elements in the training set batch_size : int how many samples per batch to load num_workers : int how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process drop_last : bool set to True to drop the last incomplete batch, if the dataset size is not divisible by the batch size. If False and the size of dataset is not divisible by the batch size, then the last batch will be smaller pin_memory : bool Whether to pin GPU memory during training Returns ------- train_dataloader, valid_dataloader : torch.DataLoader, torch.DataLoader Training and validation dataloaders """ need_shuffle, sampler = create_sampler(weights, X_train) train_dataloader = DataLoader( PredictDataset(X_train), batch_size=batch_size, sampler=sampler, shuffle=need_shuffle, num_workers=num_workers, drop_last=drop_last, pin_memory=pin_memory, ) valid_dataloaders = [] for X in eval_set: valid_dataloaders.append( DataLoader( PredictDataset(X), batch_size=batch_size, sampler=sampler, shuffle=need_shuffle, num_workers=num_workers, drop_last=drop_last, pin_memory=pin_memory, )) return train_dataloader, valid_dataloaders
def predict(self, X): """ Make predictions on a batch (valid) Parameters ---------- X : a :tensor: `torch.Tensor` Input data Returns ------- predictions : np.array Predictions of the regression problem """ self.network.eval() dataloader = DataLoader( PredictDataset(X), batch_size=self.batch_size, shuffle=False, ) results = [] embedded_res = [] for batch_nb, data in enumerate(dataloader): data = data.to(self.device).float() output, embeded_x, _ = self.network(data) predictions = output.cpu().detach().numpy() results.append(predictions) embedded_res.append(embeded_x.cpu().detach().numpy()) res_output = np.vstack(results) embedded_inputs = np.vstack(embedded_res) return res_output, embedded_inputs
def predict_proba(self, X): """ Make predictions for classification on a batch (valid) Parameters ---------- data: a :tensor: `torch.Tensor` Input data target: a :tensor: `torch.Tensor` Target data Returns ------- batch_outs: dict """ self.network.eval() dataloader = DataLoader(PredictDataset(X), batch_size=self.batch_size, shuffle=False) for batch_nb, data in enumerate(dataloader): data = data.to(self.device).float() output, M_loss, M_explain, masks = self.network(data) predictions = torch.nn.Softmax( dim=1)(output).cpu().detach().numpy() if batch_nb == 0: res = predictions else: res = np.vstack([res, predictions]) return res
def predict(self, X): """ Make predictions on a batch (valid) Parameters ---------- data: a :tensor: `torch.Tensor` Input data target: a :tensor: `torch.Tensor` Target data Returns ------- predictions: np.array Predictions of the regression problem """ self.network.eval() dataloader = DataLoader(PredictDataset(X), batch_size=self.batch_size, shuffle=False) for batch_nb, data in enumerate(dataloader): data = data.to(self.device).float() output, M_loss, M_explain, masks = self.network(data) predictions = output.cpu().detach().numpy().reshape(-1) if batch_nb == 0: res = predictions else: res = np.hstack([res, predictions]) return res
def predict_proba(self, X): """ Make predictions for classification on a batch (valid) Parameters ---------- X : a :tensor: `torch.Tensor` Input data Returns ------- res : list of np.ndarray """ self.network.eval() dataloader = DataLoader( PredictDataset(X), batch_size=self.batch_size, shuffle=False, ) results = {} for data in dataloader: data = data.to(self.device).float() output, _ = self.network(data) predictions = [ torch.nn.Softmax(dim=1)(task_output).cpu().detach().numpy() for task_output in output ] for task_idx in range(len(self.output_dim)): results[task_idx] = results.get(task_idx, []) + [predictions[task_idx]] res = [np.vstack(task_res) for task_res in results.values()] return res
def predict(self, X): """ Make predictions on a batch (valid) Parameters ---------- data: a :tensor: `torch.Tensor` Input data target: a :tensor: `torch.Tensor` Target data Returns ------- predictions: np.array Predictions of the most probable class """ self.network.eval() dataloader = DataLoader(PredictDataset(X), batch_size=self.batch_size, shuffle=False) for batch_nb, data in enumerate(dataloader): data = data.to(self.device).float() output, M_loss, M_explain, masks = self.network(data) predictions = torch.argmax(torch.nn.Softmax(dim=1)(output), dim=1) predictions = predictions.cpu().detach().numpy().reshape(-1) if batch_nb == 0: res = predictions else: res = np.hstack([res, predictions]) return np.vectorize(self.preds_mapper.get)(res)
def predict_proba(self, X): """ Make predictions for classification on a batch (valid) Parameters ---------- data: a :tensor: `torch.Tensor` Input data target: a :tensor: `torch.Tensor` Target data Returns ------- batch_outs: dict """ self.network.eval() dataloader = DataLoader(PredictDataset(X), batch_size=self.batch_size, shuffle=False, pin_memory=True) results = [] for batch_nb, data in enumerate(dataloader): data = data.to(self.device).float() output, M_loss = self.network(data) predictions = torch.nn.Softmax( dim=1)(output).cpu().detach().numpy() results.append(predictions) res = np.vstack(results) return res
def explain(self, X): """ Return local explanation Parameters ---------- X : tensor: `torch.Tensor` Input data Returns ------- M_explain : matrix Importance per sample, per columns. masks : matrix Sparse matrix showing attention masks used by network. """ try: self.model.eval() dataloader = DataLoader( PredictDataset(X), batch_size=self.batch_size, shuffle=False, ) res_explain = [] reducing_matrix = create_explain_matrix(self.model.input_dim, 0, [], self.model.input_dim) for batch_nb, data in enumerate(dataloader): data = data.to(self.device).float() M_explain, masks = self.model.forward_masks(data) for key, value in masks.items(): masks[key] = csc_matrix.dot(value.cpu().detach().numpy(), reducing_matrix) res_explain.append( csc_matrix.dot(M_explain.cpu().detach().numpy(), reducing_matrix)) if batch_nb == 0: res_masks = masks else: for key, value in masks.items(): res_masks[key] = np.vstack([res_masks[key], value]) res_explain = np.vstack(res_explain) return res_explain, res_masks finally: self.model.train()
def predict(self, X): """ Make predictions on a batch (valid) Parameters ---------- data: a :tensor: `torch.Tensor` Input data target: a :tensor: `torch.Tensor` Target data Returns ------- predictions: np.array Predictions of the most probable class """ self.network.eval() dataloader = DataLoader( PredictDataset(X), batch_size=self.batch_size, shuffle=False, pin_memory=True ) results = {} for data in dataloader: data = data.to(self.device).float() output, _ = self.network(data) predictions = [ torch.argmax(torch.nn.Softmax(dim=1)(task_output), dim=1) .cpu() .detach() .numpy() .reshape(-1) for task_output in output ] for task_idx in range(len(self.output_dim)): results[task_idx] = results.get(task_idx, []) + [predictions[task_idx]] # stack all task individually results = [np.hstack(task_res) for task_res in results.values()] # map all task individually results = [ np.vectorize(self.preds_mapper[task_idx].get)(task_res) for task_idx, task_res in enumerate(results) ] return results
def explain(self, X): """ Return local explanation Parameters ---------- data: a :tensor: `torch.Tensor` Input data target: a :tensor: `torch.Tensor` Target data Returns ------- M_explain: matrix Importance per sample, per columns. masks: matrix Sparse matrix showing attention masks used by network. """ self.network.eval() dataloader = DataLoader(PredictDataset(X), batch_size=self.batch_size, shuffle=False) for batch_nb, data in enumerate(dataloader): data = data.to(self.device).float() output, M_loss, M_explain, masks = self.network(data) for key, value in masks.items(): masks[key] = csc_matrix.dot(value.cpu().detach().numpy(), self.reducing_matrix) if batch_nb == 0: res_explain = csc_matrix.dot(M_explain.cpu().detach().numpy(), self.reducing_matrix) res_masks = masks else: res_explain = np.vstack([ res_explain, csc_matrix.dot(M_explain.cpu().detach().numpy(), self.reducing_matrix) ]) for key, value in masks.items(): res_masks[key] = np.vstack([res_masks[key], value]) return res_explain, res_masks
def predict(self, X): """ Make predictions on a batch (valid) Parameters ---------- X : a :tensor: `torch.Tensor` Input data Returns ------- predictions : np.array Predictions of the regression problem """ self.network.eval() dataloader = DataLoader( PredictDataset(X), batch_size=self.batch_size, shuffle=False, ) results = [] for batch_nb, data in enumerate(dataloader): data = data.to(self.device).float() size = data.shape[0] zeros = torch.zeros(size) ones = torch.ones(size) zeros = torch.unsqueeze(zeros, 1) ones = torch.unsqueeze(ones, 1) output0, M_loss = self.network(torch.cat((data, zeros), dim=1)) output1, M_loss = self.network(torch.cat((data, ones), dim=1)) m = nn.Softmax(dim=1) output0 = m(output0) output1 = m(output1) output = output1 - output0 # output, M_loss = self.network(data) predictions = output.cpu().detach().numpy() results.append(predictions) res = np.vstack(results) return self.predict_func(res)
def predict_proba(self, X): """ Make predictions for classification on a batch (valid) Parameters ---------- X : a :tensor: `torch.Tensor` Input data Returns ------- res : np.ndarray """ self.network.eval() dataloader = DataLoader( PredictDataset(X), batch_size=X.shape[0], shuffle=False, ) results = [] for batch_nb, data in enumerate(dataloader): data = data.to(self.device).float() output, M_loss, mu1, mu0 = self.network(data) # predictions = torch.nn.Sigmoid(dim=1)(output).cpu().detach().numpy() # print(output) # print(output.shape) # print(type(output)) predictions = output.cpu().detach().numpy() results.append(predictions) res = np.vstack(results) return res