Esempio n. 1
0
 def run(self, train: DataTuple, test: TestTuple) -> SoftPrediction:
     random_state = np.random.RandomState(seed=self.seed)
     clf = LogisticRegression(
         solver="liblinear", random_state=random_state, C=self.C, multi_class="auto"
     )
     clf.fit(train.x, train.y.to_numpy().ravel())
     return SoftPrediction(soft=pd.Series(clf.predict_proba(test.x)[:, 1]))
def train_and_predict(train: DataTuple, test: TestTuple,
                      args: DroArgs) -> SoftPrediction:
    """Train a network and return predictions."""
    # Set up the data
    train_data = CustomDataset(train)
    train_loader = DataLoader(train_data, batch_size=args.batch_size)

    test_data = TestDataset(test)
    test_loader = DataLoader(test_data, batch_size=args.batch_size)

    # Build Network
    model = DROClassifier(
        in_size=train_data.xdim,
        out_size=train_data.ydim,
        network_size=args.network_size,
        eta=args.eta,
    ).to("cpu")
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # Run Network
    for epoch in range(int(args.epochs)):
        train_model(epoch, model, train_loader, optimizer)

    # Transform output
    post_test: List[List[float]] = []
    model.eval()
    with torch.no_grad():
        for _x, _s in test_loader:
            out = model.forward(_x)
            post_test += out.data.tolist()

    return SoftPrediction(soft=pd.Series([j for i in post_test for j in i]))
Esempio n. 3
0
def metric_per_sensitive_attribute(
        prediction: Prediction,
        actual: DataTuple,
        metric: Metric,
        use_sens_name: bool = True) -> Dict[str, float]:
    """Compute a metric repeatedly on subsets of the data that share a senstitive attribute."""
    if not metric.apply_per_sensitive:
        raise MetricNotApplicable(
            f"Metric {metric.name} is not applicable per sensitive "
            f"attribute, apply to whole dataset instead")

    assert actual.s.shape[0] == actual.x.shape[0]
    assert actual.s.shape[0] == actual.y.shape[0]
    assert prediction.hard.shape[0] == actual.y.shape[0]

    per_sensitive_attr: Dict[str, float] = {}

    s_columns: List[str] = list(actual.s.columns)
    y_columns: List[str] = list(actual.y.columns)
    assert len(y_columns) == 1

    for y_col in y_columns:
        for s_col in s_columns:
            for unique_s in actual.s[s_col].unique():
                mask: pd.Series = actual.s[s_col] == unique_s
                subset = DataTuple(
                    x=pd.DataFrame(
                        actual.x.loc[mask][actual.x.columns],
                        columns=actual.x.columns).reset_index(drop=True),
                    s=pd.DataFrame(actual.s.loc[mask][s_col],
                                   columns=[s_col]).reset_index(drop=True),
                    y=pd.DataFrame(actual.y.loc[mask][y_col],
                                   columns=[y_col]).reset_index(drop=True),
                    name=actual.name,
                )
                pred_y: Prediction
                if isinstance(prediction, SoftPrediction):
                    pred_y = SoftPrediction(
                        soft=prediction.soft.loc[mask].reset_index(drop=True),
                        info=prediction.info)
                else:
                    pred_y = Prediction(
                        hard=prediction.hard.loc[mask].reset_index(drop=True),
                        info=prediction.info)
                key = (s_col if use_sens_name else "S") + "_" + str(unique_s)
                per_sensitive_attr[key] = metric.score(pred_y, subset)

    return per_sensitive_attr
Esempio n. 4
0
def predict(model: DROClassifier, test: TestTuple,
            args: DroArgs) -> SoftPrediction:
    """Train a network and return predictions."""
    # Set up the data
    test_data = TestDataset(test)
    test_loader = DataLoader(test_data, batch_size=args.batch_size)

    # Transform output
    post_test: List[List[float]] = []
    model.eval()
    with torch.no_grad():
        for _x, _s in test_loader:
            out = model.forward(_x)
            post_test += out.data.tolist()

    return SoftPrediction(soft=pd.Series([j for i in post_test for j in i]))
Esempio n. 5
0
 def predict(self, test: TestTuple) -> Prediction:
     return SoftPrediction(soft=pd.Series(self.clf.predict_proba(test.x)[:, 1]))