Example #1
0
    def test_abstain_labels(self) -> None:
        # We abstain on the last example by convention (label=-1)
        golds = np.array([1, 0, 1, 0, -1])
        preds = np.array([1, 0, 1, 1, 0])
        probs = np.array([0.8, 0.6, 0.9, 0.7, 0.4])

        # Test no abstain
        scorer = Scorer(metrics=["accuracy"], abstain_label=None)
        results = scorer.score(golds, preds, probs)
        results_expected = dict(accuracy=0.6)
        self.assertEqual(results, results_expected)

        # Test abstain=-1 for gold
        scorer = Scorer(metrics=["accuracy"], abstain_label=-1)
        results = scorer.score(golds, preds, probs)
        results_expected = dict(accuracy=0.75)
        self.assertEqual(results, results_expected)

        # Test abstain=-1 for preds and gold
        abstain_preds = np.array([-1, -1, 1, 1, 0])
        results = scorer.score(golds, abstain_preds)
        results_expected = dict(accuracy=0.5)
        self.assertEqual(results, results_expected)

        # Test abstain set to different value
        scorer = Scorer(metrics=["accuracy"], abstain_label=10)
        results = scorer.score(golds, preds, probs)
        results_expected = dict(accuracy=0.6)
        self.assertEqual(results, results_expected)
Example #2
0
    def test_dict_metric(self) -> None:
        def dict_metric(golds, preds, probs):
            return dict(a=1, b=2)

        scorer = Scorer(custom_metric_funcs=dict(dict_metric=dict_metric))
        results = scorer.score(*self._get_labels())
        results_expected = dict(a=1, b=2)
        self.assertEqual(results, results_expected)
Example #3
0
    def test_scorer(self) -> None:
        def pred_sum(golds, preds, probs):
            return np.sum(preds)

        scorer = Scorer(metrics=["accuracy", "f1"],
                        custom_metric_funcs=dict(pred_sum=pred_sum))

        results = scorer.score(*self._get_labels())
        results_expected = dict(accuracy=0.6, f1=2 / 3, pred_sum=3)
        self.assertEqual(results, results_expected)
Example #4
0
    def score(
        self,
        L: np.ndarray,
        Y: np.ndarray,
        metrics: Optional[List[str]] = ["accuracy"],
        tie_break_policy: str = "abstain",
    ) -> Dict[str, float]:
        """Calculate one or more scores from user-specified and/or user-defined metrics.

        Parameters
        ----------
        L
            An [n,m] matrix with values in {-1,0,1,...,k-1}
        Y
            Gold labels associated with data points in L
        metrics
            A list of metric names
        tie_break_policy
            Policy to break ties when converting probabilistic labels to predictions


        Returns
        -------
        Dict[str, float]
            A dictionary mapping metric names to metric scores

        Example
        -------
        >>> L = np.array([[1, 1, -1], [0, 0, -1], [1, 1, -1]])
        >>> label_model = LabelModel(verbose=False)
        >>> label_model.fit(L)
        >>> label_model.score(L, Y=np.array([1, 1, 1]))
        {'accuracy': 0.6666666666666666}
        >>> label_model.score(L, Y=np.array([1, 1, 1]), metrics=["f1"])
        {'f1': 0.8}
        """
        if tie_break_policy == "abstain":  # pragma: no cover
            logging.warning(
                "Metrics calculated over data points with non-abstain labels only"
            )

        Y_pred, Y_prob = self.predict(L,
                                      return_probs=True,
                                      tie_break_policy=tie_break_policy)

        scorer = Scorer(metrics=metrics)
        results = scorer.score(Y, Y_pred, Y_prob)
        return results
Example #5
0
def create_model(resnet_cnn):
    # freeze the resnet weights
    for param in resnet_cnn.parameters():
        param.requires_grad = False

    # define input features
    in_features = resnet_cnn.fc.in_features
    feature_extractor = nn.Sequential(*list(resnet_cnn.children())[:-1])

    # initialize FC layer: maps 3 sets of image features to class logits
    WEMB_SIZE = 100
    fc = nn.Linear(in_features * 3 + 2 * WEMB_SIZE, 3)
    init_fc(fc)

    # define layers
    module_pool = nn.ModuleDict(
        {
            "feat_extractor": feature_extractor,
            "prediction_head": fc,
            "feat_concat": FlatConcat(),
            "word_emb": WordEmb(),
        }
    )

    # define task flow through modules
    op_sequence = get_op_sequence()
    pred_cls_task = Task(
        name="visual_relation_task",
        module_pool=module_pool,
        op_sequence=op_sequence,
        scorer=Scorer(metrics=["f1_micro"]),
    )
    return MultitaskClassifier([pred_cls_task])
Example #6
0
def create_task(task_name: str, module_suffixes: List[str]) -> Task:
    module1_name = f"linear1{module_suffixes[0]}"
    module2_name = f"linear2{module_suffixes[1]}"

    module_pool = nn.ModuleDict({
        module1_name:
        nn.Sequential(nn.Linear(2, 20), nn.ReLU()),
        module2_name:
        nn.Linear(20, 2),
    })

    op1 = Operation(module_name=module1_name,
                    inputs=[("_input_", "coordinates")])
    op2 = Operation(module_name=module2_name, inputs=[op1.name])

    op_sequence = [op1, op2]

    task = Task(
        name=task_name,
        module_pool=module_pool,
        op_sequence=op_sequence,
        scorer=Scorer(metrics=["accuracy"]),
    )

    return task
Example #7
0
    def __init__(
        self,
        base_architecture: nn.Module,
        head_dim: int,
        slice_names: List[str],
        input_data_key: str = DEFAULT_INPUT_DATA_KEY,
        task_name: str = DEFAULT_TASK_NAME,
        scorer: Scorer = Scorer(metrics=["accuracy", "f1"]),
        **multitask_kwargs: Any,
    ) -> None:

        # Initialize module_pool with 1) base_architecture and 2) prediction_head
        # Assuming `head_dim` can be used to map base_architecture to prediction_head
        module_pool = nn.ModuleDict({
            "base_architecture": base_architecture,
            "prediction_head": nn.Linear(head_dim, 2),
        })

        # Create op_sequence from base_architecture -> prediction_head
        op_sequence = [
            Operation(
                name="input_op",
                module_name="base_architecture",
                inputs=[("_input_", input_data_key)],
            ),
            Operation(name="head_op",
                      module_name="prediction_head",
                      inputs=["input_op"]),
        ]

        # Initialize base_task using specified base_architecture
        self.base_task = Task(
            name=task_name,
            module_pool=module_pool,
            op_sequence=op_sequence,
            scorer=scorer,
        )

        # Convert base_task to associated slice_tasks
        slice_tasks = convert_to_slice_tasks(self.base_task, slice_names)

        # Initialize a MultitaskClassifier with all slice_tasks
        model_name = f"{task_name}_slicing_classifier"
        super().__init__(tasks=slice_tasks,
                         name=model_name,
                         **multitask_kwargs)
        self.slice_names = slice_names
Example #8
0
    def __init__(
        self,
        name: str,
        module_pool: nn.ModuleDict,
        op_sequence: Sequence[Operation],
        scorer: Scorer = Scorer(metrics=["accuracy"]),
        loss_func: Optional[Callable[..., torch.Tensor]] = None,
        output_func: Optional[Callable[..., torch.Tensor]] = None,
    ) -> None:
        self.name = name
        self.module_pool = module_pool
        self.op_sequence = op_sequence
        self.loss_func = loss_func or F.cross_entropy
        self.output_func = output_func or partial(F.softmax, dim=1)
        self.scorer = scorer

        logging.info(f"Created task: {self.name}")
Example #9
0
    def test_score_slices(self):
        DATA = [5, 10, 19, 22, 25]

        @slicing_function()
        def sf(x):
            return x.num < 20

        # We expect 3/5 correct -> 0.6 accuracy
        golds = np.array([0, 1, 0, 1, 0])
        preds = np.array([0, 0, 0, 0, 0])
        probs = preds_to_probs(preds, 2)

        # In the slice, we expect the last 2 elements to masked
        # We expect 2/3 correct -> 0.666 accuracy
        data = [SimpleNamespace(num=x) for x in DATA]
        S = SFApplier([sf]).apply(data)
        scorer = Scorer(metrics=["accuracy"])

        # Test normal score
        metrics = scorer.score(golds=golds, preds=preds, probs=probs)
        self.assertEqual(metrics["accuracy"], 0.6)

        # Test score_slices
        slice_metrics = scorer.score_slices(S=S,
                                            golds=golds,
                                            preds=preds,
                                            probs=probs)
        self.assertEqual(slice_metrics["overall"]["accuracy"], 0.6)
        self.assertEqual(slice_metrics["sf"]["accuracy"], 2.0 / 3.0)

        # Test as_dataframe=True
        metrics_df = scorer.score_slices(S=S,
                                         golds=golds,
                                         preds=preds,
                                         probs=probs,
                                         as_dataframe=True)
        self.assertTrue(isinstance(metrics_df, pd.DataFrame))
        self.assertEqual(metrics_df["accuracy"]["overall"], 0.6)
        self.assertEqual(metrics_df["accuracy"]["sf"], 2.0 / 3.0)

        # Test wrong shapes
        with self.assertRaisesRegex(ValueError,
                                    "must have the same number of elements"):
            scorer.score_slices(S=S,
                                golds=golds[:1],
                                preds=preds,
                                probs=probs,
                                as_dataframe=True)
Example #10
0
    def setUp(self):
        # Define S_matrix
        data_points = [SimpleNamespace(num=num) for num in DATA]
        applier = SFApplier([f, g])
        self.S = applier.apply(data_points, progress_bar=False)

        # Define base architecture
        self.hidden_dim = 10
        self.mlp = nn.Sequential(
            nn.Linear(2, self.hidden_dim),
            nn.Linear(self.hidden_dim, self.hidden_dim),
            nn.ReLU(),
        )

        # Define model parameters
        self.data_name = "test_data"
        self.task_name = "test_task"

        # Define datasets
        # Repeated data value for [N x 2] dim Tensor
        self.X = torch.FloatTensor([(x, x) for x in DATA])
        # Alternating labels
        self.Y = torch.LongTensor([int(i % 2 == 0) for i in range(len(DATA))])

        dataset_name = "test_dataset"
        splits = ["train", "valid"]
        self.datasets = [
            create_dataset(self.X, self.Y, split, dataset_name, self.data_name,
                           self.task_name) for split in splits
        ]

        self.slice_model = SliceAwareClassifier(
            base_architecture=self.mlp,
            head_dim=self.hidden_dim,
            slice_names=[sf.name for sf in sfs],
            input_data_key=self.data_name,
            task_name=self.task_name,
            scorer=Scorer(metrics=["f1"]),
        )
# Other parameters
n_epochs = 100
lr = 0.01
sig = 0.05
policy = "new"

# Copy data from notebook
L_data_global = np.copy(L_alarms[:, :57])
Y_data_global = np.copy(Y_alarms)

# Set up Scorer
my_metrics = {
    "abstain rate":
    lambda golds, preds, probs: np.sum(preds == ABSTAIN) / len(preds)
}
scorer = Scorer(metrics=["accuracy", "f1"], custom_metric_funcs=my_metrics)

# Define the experiment
results_mtx = np.empty((n_exps, 4, n_iters), dtype=float)
results_mtx[:] = np.nan


def thread_experiment(exp, L_data, Y_data):
    for iter in range(n_iters):
        # Randomly sample J sets of K LFs
        subsets = np.random.choice(L_data.shape[1],
                                   size=(n_subsets, subset_size),
                                   replace=with_replacement)

        # Define a new LF for each of the J sets as the prediction of a dependency-informed Snorkel model with the K LFs
        L_train, L_dev = train_test_split(L_data, test_size=0.2, shuffle=True)
Example #12
0
                    default="512",
                    help='Max size of the input in tokens')
parser.add_argument('--batch_size',
                    default="32",
                    help='Batch size of every dataset')
args = parser.parse_args()

MAX_SEQ_LENGTH = int(args.max_seq_length)
BATCH_SIZE = int(args.batch_size)

task_type_function_mapping = {
    "Classification_Tasks": {
        "data_handler": Classification_Task_Data_Handler,
        "head_module": ClassificationLinearLayer,
        "loss_function": F.cross_entropy,
        "scorer": Scorer(metrics=["accuracy"])
    },
    "Tagging_Tasks": {
        "data_handler": Tagging_Task_Data_Handler,
        "head_module": TaggingLinearLayer,
        "loss_function": tagging_cross_entropy,
        "scorer":
        Scorer(custom_metric_funcs={"Tag_accuracy": tag_accuracy_scorer})
    }
}

# Get the absolute current working directory of the project
cwd = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))

# Create empty list to hold every Dataloader object
dataloaders = []
Example #13
0
 def test_no_probs(self) -> None:
     scorer = Scorer()
     golds, preds, probs = self._get_labels()
     self.assertEqual(scorer.score(golds, preds),
                      scorer.score(golds, preds, probs))
Example #14
0
 def test_no_labels(self) -> None:
     scorer = Scorer()
     with self.assertRaisesRegex(ValueError, "Cannot score"):
         scorer.score([], [], [])
Example #15
0
 def test_no_metrics(self) -> None:
     scorer = Scorer()
     self.assertEqual(scorer.score(*self._get_labels()), {})
Example #16
0
 def test_invalid_metric(self) -> None:
     with self.assertRaisesRegex(ValueError, "Unrecognized metric"):
         Scorer(metrics=["accuracy", "f2"])
Example #17
0
def slicing_evaluation(df_train, df_test, train_model=None):
    if train_model is None:
        train_model = "mlp"

    sfs = [
        SlicingFunction.short_comment, SlicingFunction.ind_keyword,
        SlicingFunction.cmp_re, SlicingFunction.industry_keyword
    ]

    slice_names = [sf.name for sf in sfs]
    scorer = Scorer(metrics=["f1"])

    ft = FT.load(f"{WORK_PATH}/snorkel_flow/sources/fasttext_name_model.bin")

    def get_ftr(text):
        return ft.get_sentence_vector(' '.join(
            [w for w in jieba.lcut(text.strip())]))

    X_train = np.array(list(df_train.text.apply(get_ftr).values))
    X_test = np.array(list(df_test.text.apply(get_ftr).values))
    Y_train = df_train.label.values
    Y_test = df_test.label.values

    if train_model == "lr":
        sklearn_model = LogisticRegression(C=0.001, solver="liblinear")
        sklearn_model.fit(X=X_train, y=Y_train)
        preds_test = sklearn_model.predict(X_test)
        probs_test = preds_to_probs(
            preds_test,
            len([c for c in dir(Polarity) if not c.startswith("__")]))
        print(f"Test set F1: {100 * f1_score(Y_test, preds_test):.1f}%")
        applier = PandasSFApplier(sfs)
        S_test = applier.apply(df_test)
        analysis = scorer.score_slices(S=S_test,
                                       golds=Y_test,
                                       preds=preds_test,
                                       probs=probs_test,
                                       as_dataframe=True)
        return analysis

    if train_model == "mlp":
        # Define model architecture
        bow_dim = X_train.shape[1]
        hidden_dim = bow_dim
        mlp = get_pytorch_mlp(hidden_dim=hidden_dim, num_layers=2)

        # Initialize slice model
        slice_model = SliceAwareClassifier(
            base_architecture=mlp,
            head_dim=hidden_dim,
            slice_names=slice_names,
            scorer=scorer,
        )

        # generate the remaining S matrices with the new set of slicing functions
        applier = PandasSFApplier(sfs)
        S_train = applier.apply(df_train)
        S_test = applier.apply(df_test)

        # add slice labels to an existing dataloader
        BATCH_SIZE = 64

        train_dl = create_dict_dataloader(X_train, Y_train, "train")
        train_dl_slice = slice_model.make_slice_dataloader(
            train_dl.dataset, S_train, shuffle=True, batch_size=BATCH_SIZE)
        test_dl = create_dict_dataloader(X_test, Y_test, "train")
        test_dl_slice = slice_model.make_slice_dataloader(
            test_dl.dataset, S_test, shuffle=False, batch_size=BATCH_SIZE)

        #  fit our classifier with the training set dataloader
        trainer = Trainer(n_epochs=2, lr=1e-4, progress_bar=True)
        trainer.fit(slice_model, [train_dl_slice])

        analysis = slice_model.score_slices([test_dl_slice], as_dataframe=True)
        return analysis
Example #18
0
# For our data format, we leverage the [`PandasSFApplier`](https://snorkel.readthedocs.io/en/master/packages/_autosummary/slicing/snorkel.slicing.PandasSFApplier.html#snorkel.slicing.PandasSFApplier).
# The output of the `applier` is an [`np.recarray`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.recarray.html) which stores vectors in named fields indicating whether each of $n$ data points belongs to the corresponding slice.

# %% {"tags": ["md-exclude-output"]}
from snorkel.slicing import PandasSFApplier

applier = PandasSFApplier(sfs)
S_test = applier.apply(df_test)

# %% [markdown]
# Now, we initialize a [`Scorer`](https://snorkel.readthedocs.io/en/master/packages/_autosummary/analysis/snorkel.analysis.Scorer.html#snorkel.analysis.Scorer) using the desired `metrics`.

# %%
from snorkel.analysis import Scorer

scorer = Scorer(metrics=["f1"])

# %% [markdown]
# Using the [`score_slices`](https://snorkel.readthedocs.io/en/master/packages/_autosummary/analysis/snorkel.analysis.Scorer.html#snorkel.analysis.Scorer.score_slices) method, we can see both `overall` and slice-specific performance.

# %%
scorer.score_slices(S=S_test,
                    golds=Y_test,
                    preds=preds_test,
                    probs=probs_test,
                    as_dataframe=True)

# %% [markdown]
# Despite high overall performance, the `short_comment` slice performs poorly here!

# %% [markdown]
Example #19
0
def convert_to_slice_tasks(base_task: Task, slice_names: List[str]) -> List[Task]:
    """Add slice labels to dataloader and creates new slice tasks (including base slice).

    Each slice will get two slice-specific heads:
    - an indicator head that learns to identify when DataPoints are in that slice
    - a predictor head that is trained on only members of that slice

    The base task's head is replaced by a master head that makes predictions based on
    a combination of the predictor heads' predictions that are weighted by the
    indicator heads' prediction confidences.

    WARNING: The current implementation pollutes the module_pool---the indicator task's
    module_pool includes predictor modules and vice versa since both are modified in
    place. This does not affect the result because the op sequences dictate which modules
    get used, and those do not include the extra modules. An alternative would be to
    make separate copies of the module pool for each, but that wastes time and memory
    extra copies of (potentially very large) modules that will be merged in a moment
    away in the model since they have the same name. We leave resolution of this issue
    for a future release.


    Parameters
    ----------
    base_task
        Task for which we are adding slice tasks. As noted in the WARNING, this task's
        module_pool will currently be modified in place for efficiency purposes.
    slice_names
        List of slice names corresponding to the columns of the slice matrix.

    Returns
    -------
    List[Task]
        Containins original base_task, pred/ind tasks for the base slice, and pred/ind
        tasks for each of the specified slice_names
    """

    if "base" not in slice_names:
        slice_names = slice_names + ["base"]

    slice_tasks: List[Task] = []

    # Keep track of all operations related to slice tasks
    slice_task_ops: List[Operation] = []

    # NOTE: We assume here that the last operation uses the head module
    # Identify base task head module
    head_module_op = base_task.op_sequence[-1]
    head_module = base_task.module_pool[head_module_op.module_name]
    original_loss_func = base_task.loss_func

    if isinstance(head_module, nn.DataParallel):
        head_module = head_module.module

    neck_size = head_module.in_features
    assert isinstance(neck_size, int)
    base_task_cardinality = head_module.out_features
    assert isinstance(base_task_cardinality, int)

    # Remove the slice-unaware head module from module pool and op sequence
    del base_task.module_pool[head_module_op.module_name]
    body_flow = base_task.op_sequence[:-1]

    # Create slice indicator tasks
    for slice_name in slice_names:

        ind_task_name = f"{base_task.name}_slice:{slice_name}_ind"
        ind_head_module_name = f"{ind_task_name}_head"
        # Indicator head always predicts "in the slice or not", so is always binary
        ind_head_module = nn.Linear(neck_size, 2)

        # Create module_pool
        ind_module_pool = base_task.module_pool
        ind_module_pool[ind_head_module_name] = ind_head_module

        # Define operations for task head
        ind_head_op = Operation(
            module_name=ind_head_module_name, inputs=head_module_op.inputs
        )
        ind_task_ops = [ind_head_op]
        slice_task_ops.extend(ind_task_ops)

        # Create op sequence
        ind_op_sequence = list(body_flow) + list(ind_task_ops)

        # Create ind task
        ind_task = Task(
            name=ind_task_name,
            module_pool=ind_module_pool,
            op_sequence=ind_op_sequence,
            # NOTE: F1 by default because indicator task is often class imbalanced
            scorer=Scorer(metrics=["f1"]),
        )
        slice_tasks.append(ind_task)

    # Create slice predictor tasks
    shared_pred_head_module = nn.Linear(neck_size, base_task_cardinality)
    for slice_name in slice_names:

        pred_task_name = f"{base_task.name}_slice:{slice_name}_pred"

        pred_head_module_name = f"{pred_task_name}_head"
        pred_transform_module_name = f"{pred_task_name}_transform"
        pred_transform_module = nn.Linear(neck_size, neck_size)

        # Create module_pool
        # NOTE: See note in doc string about module_pool polution
        pred_module_pool = base_task.module_pool
        pred_module_pool[pred_transform_module_name] = pred_transform_module
        pred_module_pool[pred_head_module_name] = shared_pred_head_module

        # Define operations for task head
        pred_transform_op = Operation(
            module_name=pred_transform_module_name, inputs=head_module_op.inputs
        )
        pred_head_op = Operation(
            module_name=pred_head_module_name, inputs=[pred_transform_op.name]
        )
        pred_task_ops = [pred_transform_op, pred_head_op]
        slice_task_ops.extend(pred_task_ops)

        # Create op sequence
        pred_op_sequence = list(body_flow) + list(pred_task_ops)

        # Create pred task
        pred_task = Task(
            name=pred_task_name,
            module_pool=pred_module_pool,
            op_sequence=pred_op_sequence,
            scorer=base_task.scorer,
        )
        slice_tasks.append(pred_task)

    # Create master task
    master_task_name = base_task.name
    master_combiner_module_name = f"{base_task.name}_master_combiner"
    master_combiner_module = SliceCombinerModule()
    master_head_module_name = f"{base_task.name}_master_head"
    master_head_module = head_module

    # Create module_pool
    master_module_pool = nn.ModuleDict(
        {
            master_combiner_module_name: master_combiner_module,
            master_head_module_name: master_head_module,
        }
    )

    master_combiner_op = Operation(module_name=master_combiner_module_name, inputs=[])
    master_head_op = Operation(
        module_name=master_head_module_name, inputs=[master_combiner_op.name]
    )

    # NOTE: See note in doc string about module_pool polution

    # Create op_sequence
    master_op_sequence = (
        list(body_flow) + list(slice_task_ops) + [master_combiner_op, master_head_op]
    )

    master_task = Task(
        name=master_task_name,
        module_pool=master_module_pool,
        op_sequence=master_op_sequence,
        scorer=base_task.scorer,
        loss_func=original_loss_func,
    )
    return slice_tasks + [master_task]
Example #20
0
# %%
from functools import partial

import torch.nn.functional as F

from snorkel.analysis import Scorer
from snorkel.classification import Task

circle_task = Task(
    name="circle_task",
    module_pool=module_pool,
    op_sequence=op_sequence,
    loss_func=F.cross_entropy,
    output_func=partial(F.softmax, dim=1),
    scorer=Scorer(metrics=["accuracy"]),
)

# %% [markdown]
# Note that `Task` objects are not dependent on a particular dataset; multiple datasets can be passed through the same modules for pre-training or co-training.

# %% [markdown]
# ### Again, but faster

# %% [markdown]
# We'll now define the square task, but more succinctly—for example, using the fact that the default name for an `Operation` is its `module_name` (since most tasks only use their modules once per forward pass).
#
# We'll also define the square task to share the first module in its task flow (`base_mlp`) with the circle task to demonstrate how to share modules. (Note that this is purely for illustrative purposes; for this toy task, it is quite possible that this is not the optimal arrangement of modules).
#
# Finally, the most common task definitions we see in practice are classification tasks with cross-entropy loss and softmax on the output of the last module, and accuracy is most often the primary metric of interest, these are all the default values, so we can drop them here for brevity.
# Extract parameters from arguments
n_epochs = int(sys.argv[1])
lr = float(sys.argv[2])
abstain_rate = float(sys.argv[3])   # if < 0 then no abstain rate requested

# Other parameters
n_folds = 5

# Extract relevant data
L_data_local = np.copy(L_data[:,:57])
Y_data_local = np.copy(Y_data)

# Set up Scorer
my_metrics = {"abstain rate": lambda golds, preds, probs: np.sum(preds == ABSTAIN) / len(preds)}
scorer = Scorer(metrics=["accuracy","f1"], custom_metric_funcs=my_metrics)

# Cross validation
all_scores = []
kf = KFold(n_splits=n_folds, shuffle=True)

for i, (train_idx, test_idx) in enumerate(kf.split(L_data_local)):
    # Define train dataset
    L_train = L_data_local[train_idx]
    Y_train = Y_data_local[train_idx]
    # Define test dataset
    L_test = L_data_local[test_idx]
    Y_test = Y_data_local[test_idx]

    # Evaluate a dependency-informed Snorkel model
    l_model = LabelModel(cardinality=2, verbose=False)