Ejemplo n.º 1
0
        key:pd.read_table(label_destinations[key]).to_sparse()
        for key in label_destinations
    }


# In[13]:


# Important Note Snorkel Metal uses a different coding scheme
# than the label functions output. (2 for negative instead of -1).
# This step corrects this problem by converting -1s to 2

correct_L = plusminus_to_categorical(
    label_matricies['train']
    .sort_values("candidate_id")
    .drop("candidate_id", axis=1)
    .to_coo()
    .toarray()
    .astype(int)
)

correct_L_dev = plusminus_to_categorical(
    label_matricies['dev']
    .sort_values("candidate_id")
    .drop("candidate_id", axis=1)
    .to_coo()
    .toarray()
    .astype(int)
)

correct_L_test = plusminus_to_categorical(
    label_matricies['test']
    label_matricies = {
        key: pd.read_table(label_destinations[key]).to_sparse()
        for key in label_destinations
    }

# In[15]:

# Important Note Snorkel Metal uses a different coding scheme
# than the label functions output. (2 for negative instead of -1).
# This step corrects this problem by converting -1s to 2

train_ids = label_matricies['train'].candidate_id.isin(
    candidate_dfs['train'].candidate_id)

correct_L = plusminus_to_categorical(label_matricies['train'].sort_values(
    "candidate_id")[train_ids == False].drop("candidate_id",
                                             axis=1).to_coo().toarray())

correct_L_train = plusminus_to_categorical(
    label_matricies['train'].sort_values("candidate_id")[train_ids].drop(
        "candidate_id", axis=1).to_coo().toarray())

correct_L_dev = plusminus_to_categorical(
    label_matricies['dev'].sort_values("candidate_id").drop(
        "candidate_id", axis=1).to_coo().toarray())

correct_L_test = plusminus_to_categorical(
    label_matricies['test'].sort_values("candidate_id").drop(
        "candidate_id", axis=1).to_coo().toarray())

# In[16]: