def run(self, train: DataTuple, test: TestTuple) -> SoftPrediction: random_state = np.random.RandomState(seed=self.seed) clf = LogisticRegression( solver="liblinear", random_state=random_state, C=self.C, multi_class="auto" ) clf.fit(train.x, train.y.to_numpy().ravel()) return SoftPrediction(soft=pd.Series(clf.predict_proba(test.x)[:, 1]))
def train_and_predict(train: DataTuple, test: TestTuple, args: DroArgs) -> SoftPrediction: """Train a network and return predictions.""" # Set up the data train_data = CustomDataset(train) train_loader = DataLoader(train_data, batch_size=args.batch_size) test_data = TestDataset(test) test_loader = DataLoader(test_data, batch_size=args.batch_size) # Build Network model = DROClassifier( in_size=train_data.xdim, out_size=train_data.ydim, network_size=args.network_size, eta=args.eta, ).to("cpu") optimizer = optim.Adam(model.parameters(), lr=1e-3) # Run Network for epoch in range(int(args.epochs)): train_model(epoch, model, train_loader, optimizer) # Transform output post_test: List[List[float]] = [] model.eval() with torch.no_grad(): for _x, _s in test_loader: out = model.forward(_x) post_test += out.data.tolist() return SoftPrediction(soft=pd.Series([j for i in post_test for j in i]))
def metric_per_sensitive_attribute( prediction: Prediction, actual: DataTuple, metric: Metric, use_sens_name: bool = True) -> Dict[str, float]: """Compute a metric repeatedly on subsets of the data that share a senstitive attribute.""" if not metric.apply_per_sensitive: raise MetricNotApplicable( f"Metric {metric.name} is not applicable per sensitive " f"attribute, apply to whole dataset instead") assert actual.s.shape[0] == actual.x.shape[0] assert actual.s.shape[0] == actual.y.shape[0] assert prediction.hard.shape[0] == actual.y.shape[0] per_sensitive_attr: Dict[str, float] = {} s_columns: List[str] = list(actual.s.columns) y_columns: List[str] = list(actual.y.columns) assert len(y_columns) == 1 for y_col in y_columns: for s_col in s_columns: for unique_s in actual.s[s_col].unique(): mask: pd.Series = actual.s[s_col] == unique_s subset = DataTuple( x=pd.DataFrame( actual.x.loc[mask][actual.x.columns], columns=actual.x.columns).reset_index(drop=True), s=pd.DataFrame(actual.s.loc[mask][s_col], columns=[s_col]).reset_index(drop=True), y=pd.DataFrame(actual.y.loc[mask][y_col], columns=[y_col]).reset_index(drop=True), name=actual.name, ) pred_y: Prediction if isinstance(prediction, SoftPrediction): pred_y = SoftPrediction( soft=prediction.soft.loc[mask].reset_index(drop=True), info=prediction.info) else: pred_y = Prediction( hard=prediction.hard.loc[mask].reset_index(drop=True), info=prediction.info) key = (s_col if use_sens_name else "S") + "_" + str(unique_s) per_sensitive_attr[key] = metric.score(pred_y, subset) return per_sensitive_attr
def predict(model: DROClassifier, test: TestTuple, args: DroArgs) -> SoftPrediction: """Train a network and return predictions.""" # Set up the data test_data = TestDataset(test) test_loader = DataLoader(test_data, batch_size=args.batch_size) # Transform output post_test: List[List[float]] = [] model.eval() with torch.no_grad(): for _x, _s in test_loader: out = model.forward(_x) post_test += out.data.tolist() return SoftPrediction(soft=pd.Series([j for i in post_test for j in i]))
def predict(self, test: TestTuple) -> Prediction: return SoftPrediction(soft=pd.Series(self.clf.predict_proba(test.x)[:, 1]))