def fit(self, train: DataTuple) -> Tuple[PreAlgorithm, DataTuple]: """Generate fair features with the given data asynchronously. Args: train: training data test: test data Returns: a tuple of the pre-processed training data and the test data """ self.model_path = self.model_dir / f"model_{self.name}.joblib" with TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) # ================================ write data to files ================================ train_path, test_path = tmp_path / "train.npz", tmp_path / "test.npz" train.to_npz(train_path) # ========================== generate commandline arguments =========================== transformed_train_path = tmp_path / "transformed_train.npz" cmd = self._fit_script_command(train_path, transformed_train_path, self.model_path) # ============================= run the generated command ============================= self._call_script(cmd + ["--mode", "fit"]) # ================================== load results ===================================== transformed_train = DataTuple.from_npz(transformed_train_path) # prefix the name of the algorithm to the dataset name transformed_train = transformed_train.replace( name=None if train.name is None else f"{self.name}: {train.name}" ) return self, transformed_train
def main() -> None: """LFR Model. Learning fair representations is a pre-processing technique that finds a latent representation which encodes the data well but obfuscates information about protected attributes [2]_. References: .. [2] R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork, "Learning Fair Representations." International Conference on Machine Learning, 2013. Based on code from https://github.com/zjelveh/learning-fair-representations Which in turn, we've got from AIF360 """ args = ZemelArgs() args.parse_args() if args.mode == "run": assert args.train is not None assert args.new_train is not None assert args.test is not None assert args.new_test is not None train, test = load_data_from_flags(args) save_transformations(train_and_transform(train, test, args), args) elif args.mode == "fit": assert args.model is not None assert args.train is not None assert args.new_train is not None train = DataTuple.from_npz(Path(args.train)) model = fit(train, args) sens_col = train.s.columns[0] training_sensitive = train.x.loc[train.s[sens_col] == 0].to_numpy() training_nonsensitive = train.x.loc[train.s[sens_col] == 1].to_numpy() train_transformed = trans(model.prototypes, model.w, training_nonsensitive, training_sensitive, train) data = DataTuple(x=train_transformed, s=train.s, y=train.y, name=train.name) data.to_npz(Path(args.new_train)) dump(model, Path(args.model)) elif args.mode == "transform": assert args.model is not None assert args.test is not None assert args.new_test is not None test = DataTuple.from_npz(Path(args.test)) model = load(Path(args.model)) transformed_test = transform(test, model.prototypes, model.w) transformed_test.to_npz(Path(args.new_test))
def fit(self: _IA, train: DataTuple) -> _IA: """Fit algorithm on the given data asynchronously. Args: train: training data test: test data Returns: predictions """ self.model_path = self.model_dir / f"model_{self.name}.joblib" with TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) train_path = tmp_path / "train.npz" train.to_npz(train_path) cmd = self._fit_script_command(train_path, self.model_path) self._call_script(cmd + ["--mode", "fit"]) # wait for script to run return self
async def run_async(self, train: DataTuple, test: TestTuple) -> Prediction: """Run Algorithm on the given data asynchronously. Args: train: training data test: test data Returns: predictions """ with TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) train_path = tmp_path / "train.npz" test_path = tmp_path / "test.npz" pred_path = tmp_path / "predictions.npz" train.to_npz(train_path) test.to_npz(test_path) cmd = self._script_command(train_path, test_path, pred_path) await self._call_script(cmd) # wait for scrip to run return Prediction.from_npz(pred_path)
async def run_async(self, train: DataTuple, test: TestTuple) -> Tuple[DataTuple, TestTuple]: """Generate fair features with the given data asynchronously. Args: train: training data test: test data Returns: a tuple of the pre-processed training data and the test data """ with TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) # ================================ write data to files ================================ train_path, test_path = tmp_path / "train.npz", tmp_path / "test.npz" train.to_npz(train_path) test.to_npz(test_path) # ========================== generate commandline arguments =========================== transformed_train_path = tmp_path / "transformed_train.npz" transformed_test_path = tmp_path / "transformed_test.npz" cmd = self._script_command(train_path, test_path, transformed_train_path, transformed_test_path) # ============================= run the generated command ============================= await self._call_script(cmd) # ================================== load results ===================================== transformed_train = DataTuple.from_npz(transformed_train_path) transformed_test = TestTuple.from_npz(transformed_test_path) # prefix the name of the algorithm to the dataset name transformed_train = transformed_train.replace( name=None if train.name is None else f"{self.name}: {train.name}") transformed_test = transformed_test.replace( name=None if test.name is None else f"{self.name}: {test.name}") return transformed_train, transformed_test