def main() -> None: """Load data from feather files, pass it to `train_and_transform` and then save the result.""" args = BeutelArgs().parse_args() if args.mode == "run": assert args.train is not None assert args.new_train is not None assert args.test is not None assert args.new_test is not None train, test = load_data_from_flags(args) save_transformations(train_and_transform(train, test, args), args) elif args.mode == "fit": assert args.model is not None assert args.train is not None assert args.new_train is not None train = DataTuple.from_npz(Path(args.train)) transformed_train, enc = fit(train, args) transformed_train.to_npz(Path(args.new_train)) dump(enc, Path(args.model)) elif args.mode == "transform": assert args.model is not None assert args.test is not None assert args.new_test is not None test = DataTuple.from_npz(Path(args.test)) model = load(Path(args.model)) transformed_test = transform(test, model, args) transformed_test.to_npz(Path(args.new_test))
def main() -> None: """Main method to run model.""" args = VfaeArgs(explicit_bool=True).parse_args() set_seed(args.seed) if args.mode == "run": assert args.train is not None assert args.new_train is not None assert args.test is not None assert args.new_test is not None train, test = load_data_from_flags(args) save_transformations(train_and_transform(train, test, args), args) elif args.mode == "fit": assert args.model is not None assert args.train is not None assert args.new_train is not None train = DataTuple.from_npz(Path(args.train)) enc = fit(train, args) transformed_train = transform(enc, train, args) transformed_train.to_npz(Path(args.new_train)) dump(enc, Path(args.model)) elif args.mode == "transform": assert args.model is not None assert args.test is not None assert args.new_test is not None test = DataTuple.from_npz(Path(args.test)) model = load(Path(args.model)) transformed_test = transform(model, test, args) transformed_test.to_npz(Path(args.new_test))
def main() -> None: """LFR Model. Learning fair representations is a pre-processing technique that finds a latent representation which encodes the data well but obfuscates information about protected attributes [2]_. References: .. [2] R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork, "Learning Fair Representations." International Conference on Machine Learning, 2013. Based on code from https://github.com/zjelveh/learning-fair-representations Which in turn, we've got from AIF360 """ args = ZemelArgs() args.parse_args() if args.mode == "run": assert args.train is not None assert args.new_train is not None assert args.test is not None assert args.new_test is not None train, test = load_data_from_flags(args) save_transformations(train_and_transform(train, test, args), args) elif args.mode == "fit": assert args.model is not None assert args.train is not None assert args.new_train is not None train = DataTuple.from_npz(Path(args.train)) model = fit(train, args) sens_col = train.s.columns[0] training_sensitive = train.x.loc[train.s[sens_col] == 0].to_numpy() training_nonsensitive = train.x.loc[train.s[sens_col] == 1].to_numpy() train_transformed = trans(model.prototypes, model.w, training_nonsensitive, training_sensitive, train) data = DataTuple(x=train_transformed, s=train.s, y=train.y, name=train.name) data.to_npz(Path(args.new_train)) dump(model, Path(args.model)) elif args.mode == "transform": assert args.model is not None assert args.test is not None assert args.new_test is not None test = DataTuple.from_npz(Path(args.test)) model = load(Path(args.model)) transformed_test = transform(test, model.prototypes, model.w) transformed_test.to_npz(Path(args.new_test))
def main(): """This function runs the Agarwal model as a standalone program.""" args: AgarwalArgs = AgarwalArgs().parse_args() train, test = DataTuple.from_npz(Path(args.train)), TestTuple.from_npz( Path(args.test)) Prediction(hard=train_and_predict(train, test, args)["preds"]).to_npz( Path(args.predictions))
def fit(self, train: DataTuple) -> Tuple[PreAlgorithm, DataTuple]: """Generate fair features with the given data asynchronously. Args: train: training data test: test data Returns: a tuple of the pre-processed training data and the test data """ self.model_path = self.model_dir / f"model_{self.name}.joblib" with TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) # ================================ write data to files ================================ train_path, test_path = tmp_path / "train.npz", tmp_path / "test.npz" train.to_npz(train_path) # ========================== generate commandline arguments =========================== transformed_train_path = tmp_path / "transformed_train.npz" cmd = self._fit_script_command(train_path, transformed_train_path, self.model_path) # ============================= run the generated command ============================= self._call_script(cmd + ["--mode", "fit"]) # ================================== load results ===================================== transformed_train = DataTuple.from_npz(transformed_train_path) # prefix the name of the algorithm to the dataset name transformed_train = transformed_train.replace( name=None if train.name is None else f"{self.name}: {train.name}" ) return self, transformed_train
def main() -> None: """This function runs the Agarwal model as a standalone program.""" args: AgarwalArgs = AgarwalArgs().parse_args() random.seed(args.seed) np.random.seed(args.seed) try: import cloudpickle # Need to install cloudpickle for now. See https://github.com/fairlearn/fairlearn/issues/569 except ImportError as e: raise RuntimeError( "In order to use Agarwal, install fairlearn and cloudpickle." ) from e if args.mode == "run": assert args.train is not None assert args.test is not None assert args.predictions is not None train, test = DataTuple.from_npz(Path(args.train)), TestTuple.from_npz( Path(args.test)) Prediction(hard=train_and_predict(train, test, args)["preds"]).to_npz( Path(args.predictions)) elif args.mode == "fit": assert args.train is not None assert args.model is not None data = DataTuple.from_npz(Path(args.train)) model = fit(data, args) with working_dir(Path(args.model)): model_file = cloudpickle.dumps(model) dump(model_file, Path(args.model)) elif args.mode == "predict": assert args.model is not None assert args.predictions is not None assert args.test is not None data = TestTuple.from_npz(Path(args.test)) model_file = load(Path(args.model)) with working_dir(Path(args.model)): model = cloudpickle.loads(model_file) Prediction(hard=predict(model, data)["preds"]).to_npz( Path(args.predictions)) else: raise RuntimeError(f"Unknown mode: {args.mode}")
def main() -> None: """This function runs the FWD model as a standalone program on tabular data.""" args = DroArgs().parse_args() if args.mode == "run": assert args.train is not None assert args.test is not None assert args.predictions is not None train, test = load_data_from_flags(args) train_and_predict(train, test, args).to_npz(Path(args.predictions)) elif args.mode == "fit": assert args.train is not None assert args.model is not None data = DataTuple.from_npz(Path(args.train)) model = fit(data, args) dump(model, Path(args.model)) elif args.mode == "predict": assert args.model is not None assert args.predictions is not None assert args.test is not None data = TestTuple.from_npz(Path(args.test)) model = load(Path(args.model)) predict(model, data, args).to_npz(Path(args.predictions))
async def run_async(self, train: DataTuple, test: TestTuple) -> Tuple[DataTuple, TestTuple]: """Generate fair features with the given data asynchronously. Args: train: training data test: test data Returns: a tuple of the pre-processed training data and the test data """ with TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) # ================================ write data to files ================================ train_path, test_path = tmp_path / "train.npz", tmp_path / "test.npz" train.to_npz(train_path) test.to_npz(test_path) # ========================== generate commandline arguments =========================== transformed_train_path = tmp_path / "transformed_train.npz" transformed_test_path = tmp_path / "transformed_test.npz" cmd = self._script_command(train_path, test_path, transformed_train_path, transformed_test_path) # ============================= run the generated command ============================= await self._call_script(cmd) # ================================== load results ===================================== transformed_train = DataTuple.from_npz(transformed_train_path) transformed_test = TestTuple.from_npz(transformed_test_path) # prefix the name of the algorithm to the dataset name transformed_train = transformed_train.replace( name=None if train.name is None else f"{self.name}: {train.name}") transformed_test = transformed_test.replace( name=None if test.name is None else f"{self.name}: {test.name}") return transformed_train, transformed_test
def load_data_from_flags(args: AlgoArgs) -> Tuple[DataTuple, TestTuple]: """Load data from the paths specified in the flags.""" return DataTuple.from_npz(Path(args.train)), TestTuple.from_npz( Path(args.test))