def _fit(self, X: List[Config[EnsembleConfig]], y: npt.NDArray[np.float32]) -> None: # Fit transformers to infer dimensionality X_numpy_list = self.config_transformer.fit_transform(X) X_numpy = np.concatenate(X_numpy_list) X_lengths_numpy = np.array([x.shape[0] for x in X_numpy_list]) input_dim = len(self.config_transformer.feature_names_) output_dim = y.shape[1] # For initializing data, we prepare group IDs for the datasets mapping = {d: i for i, d in enumerate({x.dataset for x in X})} # For each output variable, we need to train a separate model self.models_ = [] for i in range(output_dim): model = self._init_model(input_dim) module = DeepSetLightningModule(model, self.loss, self.weight_decay) # Train on output variable i dataset = TensorDataset( torch.from_numpy(X_numpy).float(), torch.from_numpy(X_lengths_numpy).long(), torch.from_numpy(y[:, i:i + 1]).float(), torch.as_tensor([mapping[x.dataset] for x in X], dtype=torch.long), ) train_loader = DataLoader(dataset, batch_size=len(dataset)) self._trainer.fit(module, train_dataloaders=train_loader) # Add to models self.models_.append(model)
def _predict(self, X: List[Config[ModelConfig]]) -> npt.NDArray[np.float32]: # Get data X_numpy = self.config_transformer.transform(X) dataset = TensorDataset( torch.from_numpy(X_numpy).float(), torch.zeros(len(X_numpy)), # dummy data due to PL bug ) test_loader = DataLoader(dataset, batch_size=len(dataset)) # Run prediction predictions = [] for model in self.models_: module = MLPLightningModule(model, self.loss) out = cast(List[torch.Tensor], self._trainer.predict(module, test_loader)) predictions.append(out[0].numpy()) return np.concatenate(predictions, axis=-1)
def _predict(self, X: List[Config[EnsembleConfig]]) -> npt.NDArray[np.float32]: # Get data X_numpy_list = self.config_transformer.transform(X) X_numpy = np.concatenate(X_numpy_list) X_lengths_numpy = np.array([x.shape[0] for x in X_numpy_list]) dataset = TensorDataset( torch.from_numpy(X_numpy).float(), torch.from_numpy(X_lengths_numpy).long(), ) test_loader = DataLoader(dataset, batch_size=len(dataset)) # Run prediction predictions = [] for model in self.models_: module = DeepSetLightningModule(model, self.loss) out = cast(List[torch.Tensor], self._trainer.predict(module, test_loader)) predictions.append(out[0].numpy()) return np.concatenate(predictions, axis=-1)