def fit(self, train: DataTuple) -> Tuple[PreAlgorithm, DataTuple]:
        """Generate fair features with the given data asynchronously.

        Args:
            train: training data
            test: test data

        Returns:
            a tuple of the pre-processed training data and the test data
        """
        self.model_path = self.model_dir / f"model_{self.name}.joblib"
        with TemporaryDirectory() as tmpdir:
            tmp_path = Path(tmpdir)
            # ================================ write data to files ================================
            train_path, test_path = tmp_path / "train.npz", tmp_path / "test.npz"
            train.to_npz(train_path)

            # ========================== generate commandline arguments ===========================
            transformed_train_path = tmp_path / "transformed_train.npz"
            cmd = self._fit_script_command(train_path, transformed_train_path, self.model_path)

            # ============================= run the generated command =============================
            self._call_script(cmd + ["--mode", "fit"])

            # ================================== load results =====================================
            transformed_train = DataTuple.from_npz(transformed_train_path)

        # prefix the name of the algorithm to the dataset name
        transformed_train = transformed_train.replace(
            name=None if train.name is None else f"{self.name}: {train.name}"
        )
        return self, transformed_train
Beispiel #2
0
def main() -> None:
    """LFR Model.

    Learning fair representations is a pre-processing technique that finds a
    latent representation which encodes the data well but obfuscates information
    about protected attributes [2]_.

    References:
        .. [2] R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork,  "Learning
           Fair Representations." International Conference on Machine Learning,
           2013.
    Based on code from https://github.com/zjelveh/learning-fair-representations
    Which in turn, we've got from AIF360
    """
    args = ZemelArgs()
    args.parse_args()
    if args.mode == "run":
        assert args.train is not None
        assert args.new_train is not None
        assert args.test is not None
        assert args.new_test is not None
        train, test = load_data_from_flags(args)
        save_transformations(train_and_transform(train, test, args), args)
    elif args.mode == "fit":
        assert args.model is not None
        assert args.train is not None
        assert args.new_train is not None
        train = DataTuple.from_npz(Path(args.train))
        model = fit(train, args)
        sens_col = train.s.columns[0]
        training_sensitive = train.x.loc[train.s[sens_col] == 0].to_numpy()
        training_nonsensitive = train.x.loc[train.s[sens_col] == 1].to_numpy()
        train_transformed = trans(model.prototypes, model.w,
                                  training_nonsensitive, training_sensitive,
                                  train)
        data = DataTuple(x=train_transformed,
                         s=train.s,
                         y=train.y,
                         name=train.name)
        data.to_npz(Path(args.new_train))
        dump(model, Path(args.model))
    elif args.mode == "transform":
        assert args.model is not None
        assert args.test is not None
        assert args.new_test is not None
        test = DataTuple.from_npz(Path(args.test))
        model = load(Path(args.model))
        transformed_test = transform(test, model.prototypes, model.w)
        transformed_test.to_npz(Path(args.new_test))
Beispiel #3
0
    def fit(self: _IA, train: DataTuple) -> _IA:
        """Fit algorithm on the given data asynchronously.

        Args:
            train: training data
            test: test data

        Returns:
            predictions
        """
        self.model_path = self.model_dir / f"model_{self.name}.joblib"
        with TemporaryDirectory() as tmpdir:
            tmp_path = Path(tmpdir)
            train_path = tmp_path / "train.npz"
            train.to_npz(train_path)
            cmd = self._fit_script_command(train_path, self.model_path)
            self._call_script(cmd +
                              ["--mode", "fit"])  # wait for script to run
            return self
    async def run_async(self, train: DataTuple, test: TestTuple) -> Prediction:
        """Run Algorithm on the given data asynchronously.

        Args:
            train: training data
            test: test data

        Returns:
            predictions
        """
        with TemporaryDirectory() as tmpdir:
            tmp_path = Path(tmpdir)
            train_path = tmp_path / "train.npz"
            test_path = tmp_path / "test.npz"
            pred_path = tmp_path / "predictions.npz"
            train.to_npz(train_path)
            test.to_npz(test_path)
            cmd = self._script_command(train_path, test_path, pred_path)
            await self._call_script(cmd)  # wait for scrip to run
            return Prediction.from_npz(pred_path)
    async def run_async(self, train: DataTuple,
                        test: TestTuple) -> Tuple[DataTuple, TestTuple]:
        """Generate fair features with the given data asynchronously.

        Args:
            train: training data
            test: test data

        Returns:
            a tuple of the pre-processed training data and the test data
        """
        with TemporaryDirectory() as tmpdir:
            tmp_path = Path(tmpdir)
            # ================================ write data to files ================================
            train_path, test_path = tmp_path / "train.npz", tmp_path / "test.npz"
            train.to_npz(train_path)
            test.to_npz(test_path)

            # ========================== generate commandline arguments ===========================
            transformed_train_path = tmp_path / "transformed_train.npz"
            transformed_test_path = tmp_path / "transformed_test.npz"
            cmd = self._script_command(train_path, test_path,
                                       transformed_train_path,
                                       transformed_test_path)

            # ============================= run the generated command =============================
            await self._call_script(cmd)

            # ================================== load results =====================================
            transformed_train = DataTuple.from_npz(transformed_train_path)
            transformed_test = TestTuple.from_npz(transformed_test_path)

        # prefix the name of the algorithm to the dataset name
        transformed_train = transformed_train.replace(
            name=None if train.name is None else f"{self.name}: {train.name}")
        transformed_test = transformed_test.replace(
            name=None if test.name is None else f"{self.name}: {test.name}")
        return transformed_train, transformed_test