Esempio n. 1
0
def main() -> None:
    """Load data from feather files, pass it to `train_and_transform` and then save the result."""
    args = BeutelArgs().parse_args()
    if args.mode == "run":
        assert args.train is not None
        assert args.new_train is not None
        assert args.test is not None
        assert args.new_test is not None
        train, test = load_data_from_flags(args)
        save_transformations(train_and_transform(train, test, args), args)
    elif args.mode == "fit":
        assert args.model is not None
        assert args.train is not None
        assert args.new_train is not None
        train = DataTuple.from_npz(Path(args.train))
        transformed_train, enc = fit(train, args)
        transformed_train.to_npz(Path(args.new_train))
        dump(enc, Path(args.model))
    elif args.mode == "transform":
        assert args.model is not None
        assert args.test is not None
        assert args.new_test is not None
        test = DataTuple.from_npz(Path(args.test))
        model = load(Path(args.model))
        transformed_test = transform(test, model, args)
        transformed_test.to_npz(Path(args.new_test))
Esempio n. 2
0
def main() -> None:
    """Main method to run model."""
    args = VfaeArgs(explicit_bool=True).parse_args()
    set_seed(args.seed)
    if args.mode == "run":
        assert args.train is not None
        assert args.new_train is not None
        assert args.test is not None
        assert args.new_test is not None
        train, test = load_data_from_flags(args)
        save_transformations(train_and_transform(train, test, args), args)
    elif args.mode == "fit":
        assert args.model is not None
        assert args.train is not None
        assert args.new_train is not None
        train = DataTuple.from_npz(Path(args.train))
        enc = fit(train, args)
        transformed_train = transform(enc, train, args)
        transformed_train.to_npz(Path(args.new_train))
        dump(enc, Path(args.model))
    elif args.mode == "transform":
        assert args.model is not None
        assert args.test is not None
        assert args.new_test is not None
        test = DataTuple.from_npz(Path(args.test))
        model = load(Path(args.model))
        transformed_test = transform(model, test, args)
        transformed_test.to_npz(Path(args.new_test))
Esempio n. 3
0
def main() -> None:
    """LFR Model.

    Learning fair representations is a pre-processing technique that finds a
    latent representation which encodes the data well but obfuscates information
    about protected attributes [2]_.

    References:
        .. [2] R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork,  "Learning
           Fair Representations." International Conference on Machine Learning,
           2013.
    Based on code from https://github.com/zjelveh/learning-fair-representations
    Which in turn, we've got from AIF360
    """
    args = ZemelArgs()
    args.parse_args()
    if args.mode == "run":
        assert args.train is not None
        assert args.new_train is not None
        assert args.test is not None
        assert args.new_test is not None
        train, test = load_data_from_flags(args)
        save_transformations(train_and_transform(train, test, args), args)
    elif args.mode == "fit":
        assert args.model is not None
        assert args.train is not None
        assert args.new_train is not None
        train = DataTuple.from_npz(Path(args.train))
        model = fit(train, args)
        sens_col = train.s.columns[0]
        training_sensitive = train.x.loc[train.s[sens_col] == 0].to_numpy()
        training_nonsensitive = train.x.loc[train.s[sens_col] == 1].to_numpy()
        train_transformed = trans(model.prototypes, model.w,
                                  training_nonsensitive, training_sensitive,
                                  train)
        data = DataTuple(x=train_transformed,
                         s=train.s,
                         y=train.y,
                         name=train.name)
        data.to_npz(Path(args.new_train))
        dump(model, Path(args.model))
    elif args.mode == "transform":
        assert args.model is not None
        assert args.test is not None
        assert args.new_test is not None
        test = DataTuple.from_npz(Path(args.test))
        model = load(Path(args.model))
        transformed_test = transform(test, model.prototypes, model.w)
        transformed_test.to_npz(Path(args.new_test))
Esempio n. 4
0
def main():
    """This function runs the Agarwal model as a standalone program."""
    args: AgarwalArgs = AgarwalArgs().parse_args()
    train, test = DataTuple.from_npz(Path(args.train)), TestTuple.from_npz(
        Path(args.test))
    Prediction(hard=train_and_predict(train, test, args)["preds"]).to_npz(
        Path(args.predictions))
    def fit(self, train: DataTuple) -> Tuple[PreAlgorithm, DataTuple]:
        """Generate fair features with the given data asynchronously.

        Args:
            train: training data
            test: test data

        Returns:
            a tuple of the pre-processed training data and the test data
        """
        self.model_path = self.model_dir / f"model_{self.name}.joblib"
        with TemporaryDirectory() as tmpdir:
            tmp_path = Path(tmpdir)
            # ================================ write data to files ================================
            train_path, test_path = tmp_path / "train.npz", tmp_path / "test.npz"
            train.to_npz(train_path)

            # ========================== generate commandline arguments ===========================
            transformed_train_path = tmp_path / "transformed_train.npz"
            cmd = self._fit_script_command(train_path, transformed_train_path, self.model_path)

            # ============================= run the generated command =============================
            self._call_script(cmd + ["--mode", "fit"])

            # ================================== load results =====================================
            transformed_train = DataTuple.from_npz(transformed_train_path)

        # prefix the name of the algorithm to the dataset name
        transformed_train = transformed_train.replace(
            name=None if train.name is None else f"{self.name}: {train.name}"
        )
        return self, transformed_train
Esempio n. 6
0
def main() -> None:
    """This function runs the Agarwal model as a standalone program."""
    args: AgarwalArgs = AgarwalArgs().parse_args()
    random.seed(args.seed)
    np.random.seed(args.seed)
    try:
        import cloudpickle

        # Need to install cloudpickle for now. See https://github.com/fairlearn/fairlearn/issues/569
    except ImportError as e:
        raise RuntimeError(
            "In order to use Agarwal, install fairlearn and cloudpickle."
        ) from e

    if args.mode == "run":
        assert args.train is not None
        assert args.test is not None
        assert args.predictions is not None
        train, test = DataTuple.from_npz(Path(args.train)), TestTuple.from_npz(
            Path(args.test))
        Prediction(hard=train_and_predict(train, test, args)["preds"]).to_npz(
            Path(args.predictions))
    elif args.mode == "fit":
        assert args.train is not None
        assert args.model is not None
        data = DataTuple.from_npz(Path(args.train))
        model = fit(data, args)
        with working_dir(Path(args.model)):
            model_file = cloudpickle.dumps(model)
        dump(model_file, Path(args.model))
    elif args.mode == "predict":
        assert args.model is not None
        assert args.predictions is not None
        assert args.test is not None
        data = TestTuple.from_npz(Path(args.test))
        model_file = load(Path(args.model))
        with working_dir(Path(args.model)):
            model = cloudpickle.loads(model_file)
        Prediction(hard=predict(model, data)["preds"]).to_npz(
            Path(args.predictions))
    else:
        raise RuntimeError(f"Unknown mode: {args.mode}")
Esempio n. 7
0
def main() -> None:
    """This function runs the FWD model as a standalone program on tabular data."""
    args = DroArgs().parse_args()
    if args.mode == "run":
        assert args.train is not None
        assert args.test is not None
        assert args.predictions is not None
        train, test = load_data_from_flags(args)
        train_and_predict(train, test, args).to_npz(Path(args.predictions))
    elif args.mode == "fit":
        assert args.train is not None
        assert args.model is not None
        data = DataTuple.from_npz(Path(args.train))
        model = fit(data, args)
        dump(model, Path(args.model))
    elif args.mode == "predict":
        assert args.model is not None
        assert args.predictions is not None
        assert args.test is not None
        data = TestTuple.from_npz(Path(args.test))
        model = load(Path(args.model))
        predict(model, data, args).to_npz(Path(args.predictions))
    async def run_async(self, train: DataTuple,
                        test: TestTuple) -> Tuple[DataTuple, TestTuple]:
        """Generate fair features with the given data asynchronously.

        Args:
            train: training data
            test: test data

        Returns:
            a tuple of the pre-processed training data and the test data
        """
        with TemporaryDirectory() as tmpdir:
            tmp_path = Path(tmpdir)
            # ================================ write data to files ================================
            train_path, test_path = tmp_path / "train.npz", tmp_path / "test.npz"
            train.to_npz(train_path)
            test.to_npz(test_path)

            # ========================== generate commandline arguments ===========================
            transformed_train_path = tmp_path / "transformed_train.npz"
            transformed_test_path = tmp_path / "transformed_test.npz"
            cmd = self._script_command(train_path, test_path,
                                       transformed_train_path,
                                       transformed_test_path)

            # ============================= run the generated command =============================
            await self._call_script(cmd)

            # ================================== load results =====================================
            transformed_train = DataTuple.from_npz(transformed_train_path)
            transformed_test = TestTuple.from_npz(transformed_test_path)

        # prefix the name of the algorithm to the dataset name
        transformed_train = transformed_train.replace(
            name=None if train.name is None else f"{self.name}: {train.name}")
        transformed_test = transformed_test.replace(
            name=None if test.name is None else f"{self.name}: {test.name}")
        return transformed_train, transformed_test
Esempio n. 9
0
def load_data_from_flags(args: AlgoArgs) -> Tuple[DataTuple, TestTuple]:
    """Load data from the paths specified in the flags."""
    return DataTuple.from_npz(Path(args.train)), TestTuple.from_npz(
        Path(args.test))