def get_negative_labeling_function(divisor: int) -> LabelingFunction:
    """Get LabelingFunction that abstains unless x0 is divisible by divisor."""

    def f(x):
        return 0 if x.x0 % divisor == 0 and x.x1 <= x.x2 + 0.25 else -1

    return LabelingFunction(f"lf_neg_{divisor}", f)
예제 #2
0
def make_keyword_lf_NL(keywords, label=NL):
    return LabelingFunction(
        name=f"keyword_{keywords[0]}_NL",
        f=keyword_lookup,
        resources=dict(keywords=keywords, label=label),
        #        pre=[spacy_preproc]
    )
예제 #3
0
def make_thresold_lf(thresh, col_name, next_threshold=sys.maxsize):
    return LabelingFunction(
        name="more_than_%s_%s" % (thresh, col_name),
        f=more_than_treshold,
        resources=dict(thresh=thresh,
                       col_name=col_name,
                       next_threshold=next_threshold),
    )
예제 #4
0
    def wrapper(func):
        # set up kwargs for Snorkel's LF
        # a default name that can be overridden
        snorkel_kwargs = {"name": func.__name__}
        snorkel_kwargs.update(kwargs)

        # return value of hover's decorator
        lf = SnorkelLF(f=func, **snorkel_kwargs)

        # additional attributes
        lf.uuid = uuid.uuid1()
        lf.targets = targets[:]

        # link a snorkel-style labeling function if applicable
        if label_encoder:
            lf.label_encoder = label_encoder

            def snorkel_style_func(x):
                return lf.label_encoder[func(x)]

            lf.snorkel = snorkel_lf(**kwargs)(snorkel_style_func)
        else:
            lf.label_encoder = None
            lf.snorkel = None

        return lf
예제 #5
0
def make_worker_lf(worker_id,
                   x_id_field: str = "tweet_id") -> LabelingFunction:
    def worker_lf(x, worker_dict):
        return worker_dict.get(x[x_id_field], ABSTAIN)

    worker_dict = worker_dicts[worker_id]
    name = f"worker_{worker_id}"
    return LabelingFunction(name,
                            f=worker_lf,
                            resources={"worker_dict": worker_dict})
예제 #6
0
def createAnalysis(final_df, category_names):

    L_final = []
    for name in category_names:
        category_if = [-1 if i == 0 else i for i in final_df[name].tolist()]
        L_final.append(category_if)

    L_train = [list(x) for x in list(zip(*L_final))]
    lfs = [LabelingFunction(name=name, f=None) for name in category_names]

    return LFAnalysis(L=np.array(L_train), lfs=lfs).lf_summary()
예제 #7
0
def make_keyword_lf(keyword, label, neg_label, context_len, with_period):
    return LabelingFunction(
        name="pattern_%s_%s%s" % (keyword, "context:%d" % context_len,
                                  "_period" if with_period else ""),
        f=pattern_match,
        resources=dict(keyword=keyword,
                       label=label,
                       neg_label=neg_label,
                       with_period=with_period,
                       context_len=context_len),
    )
예제 #8
0
def make_keyword_lf(keywords, label=1):
    def keyword_lookup(x, keywords, label):
        if any(word in x.text.lower() for word in keywords):
            return label
        return -1

    return LabelingFunction(
        name=f"keyword_{keywords[0]}",
        f=keyword_lookup,
        resources=dict(keywords=keywords, label=label),
    )
예제 #9
0
def make_annotator_lf(worker_index, num_annotators = sys.maxsize):
    worker_index_old = worker_index
    worker_index = worker_index % num_annotators
    reader = csv.reader(open("/home/tigunova/PycharmProjects/snorkel_labels/data/hobby/labeling_lf/%d.csv" % (worker_index)))
    next(reader)
    worker_dict = dict((x[-1], POS if x[1] == "checked" else NEG) for x in reader)
    return LabelingFunction(
        name="worker_%d" % (worker_index_old),
        f=worker_lf,
        resources=dict(worker_dict=worker_dict),
    )
예제 #10
0
def make_keyword_lf(keywords: List[str], label: str, field: str = "text"):
    def keyword_lookup(x, keywords, label):
        if any(word in x[field].lower() for word in keywords):
            return label
        return ABSTAIN

    return LabelingFunction(
        name=f"keyword_{keywords[0]}",
        f=keyword_lookup,
        resources=dict(keywords=keywords, label=label),
    )
예제 #11
0
def make_annotator_lf(worker_index):
    reader = csv.reader(
        open(
            "/home/tigunova/PycharmProjects/snorkel_labels/data/profession/labeling_lf/%d.csv"
            % (worker_index)))
    next(reader)
    worker_dict = dict(
        (x[-1], POS if x[1] == "checked" else ABSTAIN) for x in reader)
    return LabelingFunction(
        name="worker_%d" % (worker_index),
        f=worker_lf,
        resources=dict(worker_dict=worker_dict),
    )
def make_keyword_lf(keywords: list, label: int = RELEVANT):
    """Generate labeling functions from keywords related to the application area

    :param keywords: A list of keywords related to the application area
    :type keywords: list
    :param label: The label that should be assigned to each labeling function, defaults to RELEVANT
    :type label: int, optional
    :return: Returns a labeling function which implements the `keyword_lookup` function
    :rtype: LabelingFunction
    """
    return LabelingFunction(name=f"keyword_{keywords[0]}",
                            f=keyword_lookup,
                            resources=dict(keywords=keywords, label=label))
예제 #13
0
    def make_keyword_lf(self, keywords: list, label: int, lf_name: str = None):
        """Generate a labeling function from a keyword

        :param keywords: A list of keywords which will be used to generate the labeling functions
        :type keywords: list
        :param label: The label to assign to the labeling function
        :type label: int
        :param lf_name: A unique name for the labling function
        :type lf_name: str, optional
        :return: returns a labeling function which implements `keyword_lookup`
        :rtype: LabelingFunction
        """
        labeling_function_name = f"keyword_{re.sub(' ', '_', keywords[0].strip())}"
        return LabelingFunction(name=labeling_function_name,
                                f=self.keyword_lookup,
                                resources=dict(keywords=keywords, label=label))
예제 #14
0
    def test_lf_summary(self) -> None:
        df = self.lfa.lf_summary(self.Y, est_weights=None)
        df_expected = pd.DataFrame(
            {
                "Polarity": [[1, 2], [], [0, 2], [2], [0, 1], [0]],
                "Coverage": [3 / 6, 0, 3 / 6, 2 / 6, 2 / 6, 4 / 6],
                "Overlaps": [3 / 6, 0, 3 / 6, 1 / 6, 2 / 6, 4 / 6],
                "Conflicts": [3 / 6, 0, 2 / 6, 1 / 6, 2 / 6, 3 / 6],
                "Correct": [1, 0, 1, 1, 1, 2],
                "Incorrect": [2, 0, 2, 1, 1, 2],
                "Emp. Acc.": [1 / 3, 0, 1 / 3, 1 / 2, 1 / 2, 2 / 4],
            }
        )
        pd.testing.assert_frame_equal(df.round(6), df_expected.round(6))

        df = self.lfa.lf_summary(Y=None, est_weights=None)
        df_expected = pd.DataFrame(
            {
                "Polarity": [[1, 2], [], [0, 2], [2], [0, 1], [0]],
                "Coverage": [3 / 6, 0, 3 / 6, 2 / 6, 2 / 6, 4 / 6],
                "Overlaps": [3 / 6, 0, 3 / 6, 1 / 6, 2 / 6, 4 / 6],
                "Conflicts": [3 / 6, 0, 2 / 6, 1 / 6, 2 / 6, 3 / 6],
            }
        )
        pd.testing.assert_frame_equal(df.round(6), df_expected.round(6))

        est_weights = [1, 0, 1, 1, 1, 0.5]
        names = list("abcdef")
        lfs = [LabelingFunction(s, f) for s in names]
        lfa = LFAnalysis(np.array(L), lfs)
        df = lfa.lf_summary(self.Y, est_weights=est_weights)
        df_expected = pd.DataFrame(
            {
                "j": [0, 1, 2, 3, 4, 5],
                "Polarity": [[1, 2], [], [0, 2], [2], [0, 1], [0]],
                "Coverage": [3 / 6, 0, 3 / 6, 2 / 6, 2 / 6, 4 / 6],
                "Overlaps": [3 / 6, 0, 3 / 6, 1 / 6, 2 / 6, 4 / 6],
                "Conflicts": [3 / 6, 0, 2 / 6, 1 / 6, 2 / 6, 3 / 6],
                "Correct": [1, 0, 1, 1, 1, 2],
                "Incorrect": [2, 0, 2, 1, 1, 2],
                "Emp. Acc.": [1 / 3, 0, 1 / 3, 1 / 2, 1 / 2, 2 / 4],
                "Learned Weight": [1, 0, 1, 1, 1, 0.5],
            }
        ).set_index(pd.Index(names))
        pd.testing.assert_frame_equal(df.round(6), df_expected.round(6))
예제 #15
0
def get_lfs(keywords):
    lfs = []
    idx_label_map = {}
    for i, (label, kws) in enumerate(keywords.items()):
        idx_label_map[i] = label
        for kw in kws:
            parsed_kw = parse_kw(kw)
            if parsed_kw['first'] == 'B':
                name = parsed_kw['text'] + f'...[{label}]'
            elif parsed_kw['first'] == 'I':
                name = f'[{parsed_kw["text"]}...{label}]'
            elif parsed_kw['first'] == 'A':
                name = f'[{label}]...{parsed_kw["text"]}'
            lfs.append(
                LabelingFunction(name=name,
                                 f=sent_context_lf,
                                 resources=dict(label=i, parsed_kw=parsed_kw)))
    return lfs, idx_label_map
예제 #16
0

def pos(sample):
    return 1 if re.search(PWORDS, str(sample)) else -1


NWORDS = r"\b(hell yeah|bribery|not happy|less moral|impeach trump|impeach our president)"


def neg(sample):
    return 0 if re.search(NWORDS, str(sample)) else -1


# In[8]:

positive = LabelingFunction(f"positive", f=pos)

# In[9]:

negative = LabelingFunction(f"negative", f=neg)

# In[10]:

df_train = dt[0:450000]

# In[11]:


# Create the labeling functions using the textblob sentiment analyzer
@preprocessor(memoize=True)
def textblob_polarity(x):
예제 #17
0
def make_keyword_lf(keywords, label=SPAM):
    return LabelingFunction(
        name=f"keyword_{keywords[0]}",
        f=keyword_lookup,
        resources=dict(keywords=keywords, label=label),
    )
 def test_wrong_number_of_lfs(self) -> None:
     with self.assertRaisesRegex(ValueError, "Number of LFs"):
         LFAnalysis(np.array(L), [LabelingFunction(s, f) for s in "ab"])
예제 #19
0
def make_keyword_lf(lf_name, keywords, label=IRRELEVANT):
    return LabelingFunction(
        name=lf_name,
        f=regex_keyword_lookup,
        resources=dict(keywords=keywords, label=label),
    )
 def make_abstract_lf(keywords, name, label=None):
     return LabelingFunction(
         name=f"abstract_{name}",
         f=abstract_lookup,
         resources=dict(keywords=keywords, label=label),
     )
 def make_keyword_lf(keywords, virus, name, label=None):
     return LabelingFunction(
         name=f"keyword_{name}",
         f=keyword_lookup,
         resources=dict(keywords=keywords, virus=virus, label=label),
     )
예제 #22
0
def make_lfs_list(post_hoc_callables, GI_callables, rule_out_callables,
                  lab_callables):
    lfs = []

    for f in post_hoc_callables:
        lfs.append(f)

    for f in rule_out_callables:
        lfs.append(f)

    for f in GI_callables:
        lfs.append(f)

    for f in lab_callables:
        lfs.append(f)

#    """
#    Returns 1 if any rule out condition is met, 0 otherwise.
#    """
#    def rule_out(x):
#        for f in rule_out_callables:
#            if f(x)==0:
#                return 1
#        return 0
#
#    """
#    1 if any GI callable labels 1, -1 otherwise
#    """
#    def any_GI(x):
#        for f in GI_callables:
#            if f(x)==1:
#                return 1
#        return -1
#
#    """
#    1 if any lab callable labels 1, -1 otherwise
#    """
#    def any_lab(x):
#        for f in lab_callables:
#            if f(x)==1:
#                return 1
#        return -1
#
#    """
#    Form LFs
#    """
#    for f in GI_callables:
#        def new_lf(x, f=f):
#            if f(x) == 1 and rule_out(x) == 0:
#                return 1
#            else:
#                return -1
#        new_lf.__name__ = f.__name__
#        lfs.append(new_lf)
#
#        def GI_any_lab(x, new_lf=new_lf):
#            if new_lf(x) == 0:
#                return 0
#            elif new_lf(x) == 1 and any_lab(x) == 1:
#                return 1
#            else:
#                return -1
#        GI_any_lab.__name__ = f.__name__+'_lab'
#        lfs.append(GI_any_lab)
#
#
#    for f in lab_callables:
#        def new_lf(x, f=f):
#            if f(x) == 1 and rule_out(x) == 0:
#                return 1
#            else:
#                return -1
#        new_lf.__name__ = f.__name__
#        lfs.append(new_lf)
#
#        def lab_any_GI(x, new_lf=new_lf):
#            if new_lf(x) == 0:
#                return 0
#            elif new_lf(x) == 1 and any_GI(x) == 1:
#                return 1
#            else:
#                return -1
#        lab_any_GI.__name__ = f.__name__+'_GI'
#        lfs.append(lab_any_GI)

    wrapped_lfs = []
    for lf in lfs:
        wrapped_lfs.append(LabelingFunction(name=lf.__name__, f=lf))

    return wrapped_lfs
예제 #23
0
def make_lexicon_lf(thresh, pref="", previous_threshold=-sys.maxsize):
    return LabelingFunction(
        name="%s_less_%s" % (pref, thresh),
        f=less_than_treshold,
        resources=dict(thresh=thresh, previous_threshold=previous_threshold),
    )
def make_keyword_lf(name, keywords_pos, keywords_neg):
    return LabelingFunction(
        name=name,
        f=keyword_lookup,
        resources=dict(keywords_pos=keywords_pos, keywords_neg=keywords_neg),
    )
def make_expression_lf(name, pre_pos, expression):
    return LabelingFunction(
        name=name,
        f=expression_lookup,
        resources=dict(pre_pos=pre_pos, expression=expression),
    )
예제 #26
0
def make_worker_lf(worker_id):
    worker_dict = worker_dicts[worker_id]
    name = f"worker_{worker_id}"
    return LabelingFunction(name,
                            f=worker_lf,
                            resources={"worker_dict": worker_dict})