예제 #1
0
    def get_source_target(cand: DataPoint) -> DataPoint:
        """Returnsthe source and target mentioned in the sentence."""
        person_names = []

        source = [token.text for token in cand.doc if token.text in sources]
        target = [token.text for token in cand.doc if token.text in targets]

        try:
            cand.source_target = (source[0], target[0])
        except:
            cand.source_target = (np.nan, np.nan)
        return cand
예제 #2
0
def get_left_tokens(cand: DataPoint) -> DataPoint:
    """
    Returns tokens in the length 3 window to the left of the person mentions
    """
    # TODO: need to pass window as input params
    window = 3

    end = cand.person1_word_idx[0]
    cand.person1_left_tokens = cand.tokens[0:end][-1 - window:-1]

    end = cand.person2_word_idx[0]
    cand.person2_left_tokens = cand.tokens[0:end][-1 - window:-1]
    return cand
예제 #3
0
def get_left_tokens(cand: DataPoint) -> DataPoint:
    """
    Returns tokens in the three length window to the left of entity mentions.
    :param cand: A candidate DF.
    :return: Candidate DF with two new columns, each a list of tokens to the left of entities.
    """
    # TODO: make window a parameter
    window = 3
    end = cand.person1_word_idx[0]
    cand.person1_left_tokens = cand.tokens[0:end][1 - window: -1]
    end = cand.person2_word_idx[0]
    cand.person2_left_tokens = cand.tokens[0:end][1 - window: -1]
    return cand
예제 #4
0
def get_text_between(cand: DataPoint) -> DataPoint:
    """
    Returns the text between the two person mentions in the sentence
    """
    start = cand.person1_word_idx[1] + 1
    end = cand.person2_word_idx[0]
    cand.text_between = " ".join(cand.tokens[start:end])
    return cand
예제 #5
0
def get_text_between(cand: DataPoint) -> DataPoint:
    """
    Returns the text between two entity mentions.
    :param cand: A candidate DF.
    :return: Candidate DF with new column, text between entity mentions.
    """
    start = cand.person1_word_idx[1] + 1
    end = cand.person2_word_idx[0]
    cand.text_between = " ".join(cand.tokens[start:end])
    return cand
예제 #6
0
def get_person_last_names(cand: DataPoint) -> DataPoint:
    """
    Returns the last names for the two person mentions in candidate
    """
    cand = get_person_text(cand)
    person1_name, person2_name = cand.person_names
    person1_lastname = (person1_name.split(" ")[-1]
                        if len(person1_name.split(" ")) > 1 else None)
    person2_lastname = (person2_name.split(" ")[-1]
                        if len(person2_name.split(" ")) > 1 else None)
    cand.person_lastnames = [person1_lastname, person2_lastname]
    return cand
예제 #7
0
def get_person_text(cand: DataPoint) -> DataPoint:
    """
    Returns the text for the two person mentions in candidate
    """
    person_names = []
    for index in [1, 2]:
        field_name = "person{}_word_idx".format(index)
        start = cand[field_name][0]
        end = cand[field_name][1] + 1
        person_names.append(" ".join(cand["tokens"][start:end]))
    cand.person_names = person_names
    return cand
예제 #8
0
def get_persons_last_name(cand: DataPoint) -> DataPoint:
    """
    Returns entity last names.
    :param cand: A candidate DF.
    :return: Candidate DF with a new column, a list of last names.
    """
    cand = get_person_text(cand)
    person1_name, person2_name = cand.person_names
    person1_last_name = (person1_name.split(" ")[-1] if len(person1_name.split(" ")) > 1 else None)
    person2_last_name = (person2_name.split(" ")[-1] if len(person2_name.split(" ")) > 1 else None)
    cand.person_lastnames = [person1_last_name, person2_last_name]
    return cand
예제 #9
0
def get_person_text(cand: DataPoint) -> DataPoint:
    """
    Returns the text for the two person mentions in candidate sentence.
    :param cand: A candidate DF.
    :return: Candidate DF with new column, a list of entity names.
    """
    person_names = []
    for index in [1, 2]:
        field_name = "person{j}_word_idx".format(j=index)
        start = cand[field_name][0]
        end = cand[field_name][1] + 1
        person_names.append(" ".join(cand["tokens"][start:end]))
    cand.person_names = person_names
    return cand
예제 #10
0
 def square(x: DataPoint) -> DataPoint:
     x.num_squared = square_hit_tracker(x.num)
     if x.num == 21:
         return None
     return x
예제 #11
0
 def mapper_pre_2(x: DataPoint) -> DataPoint:
     x.double_num_squared_plus_1 = x.double_num_squared + 1
     return x
예제 #12
0
def modify_in_place(x: DataPoint) -> DataPoint:
    x.d["my_key"] = 0
    return Row(num=x.num, d=x.d, d_new=x.d)
예제 #13
0
def square(x: DataPoint) -> DataPoint:
    fields = x.asDict()
    fields["num_squared"] = x.num**2
    return Row(**fields)
예제 #14
0
 def square(x: DataPoint) -> DataPoint:
     fields = x.asDict()
     fields["num_squared"] = square_hit_tracker(x.num)
     return Row(**fields)
예제 #15
0
def copy_features(x: DataPoint) -> DataPoint:
    """Compute x2 + 0.25 for direct comparison to x1."""
    x.x3 = x.x2 + 0.25
    return x
예제 #16
0
    def get_text_between(cand: DataPoint) -> DataPoint:
        """
        Returns the text between a source-target pair and the text to the left of the source
        """

        source_idx = [token.i for token in cand.doc if token.text in sources]
        target_idx = [token.i for token in cand.doc if token.text in targets]

        try:

            if (len(target_idx) == 1) & (len(source_idx) == 1) & (
                    source_idx[0] < target_idx[0]):
                cand.text_between = cand.doc[source_idx[0]:target_idx[0]]
                cand.text_to_source_left = cand.doc[:source_idx[0]]

            elif (len(target_idx) > 1) & (len(source_idx) == 1):
                for target_index in target_idx:
                    if source_idx[0] < target_index:
                        cand.text_between = cand.doc[
                            source_idx[0]:target_index]
                        cand.text_to_source_left = cand.doc[:source_idx[0]]

            elif (len(source_idx) > 1) & (len(target_idx) == 1):
                for source_index in source_idx:
                    if source_index < target_idx[0]:
                        cand.text_between = cand.doc[
                            source_index:target_idx[0]]
                        cand.text_to_source_left = cand.doc[:source_index]

            elif (len(source_idx) > 1) & (len(target_idx) > 1):
                for source_index in source_idx:
                    for target_index in target_idx:
                        if source_index < target_index:
                            cand.text_between = cand.doc[
                                source_index:target_index]
                            cand.text_to_source_left = cand.doc[:source_index]

            else:
                cand.text_between = 'NaN'
                cand.text_to_source_left = 'NaN'
        except:

            cand.text_between = 'NaN'
            cand.text_to_source_left = 'NaN'

        return cand
예제 #17
0
def square(x: DataPoint) -> DataPoint:
    x.num_squared = x.num ** 2
    return x
예제 #18
0
 def square(x: DataPoint) -> DataPoint:
     x.num_squared = square_hit_tracker(x.num)
     return x
예제 #19
0
 def mapper_pre(x: DataPoint) -> DataPoint:
     x.double_num_squared = 2 * x.num_squared
     return x
def square_returns_none(x: DataPoint) -> DataPoint:
    if x.num == 2:
        return None
    x.num = x.num**2
    return x
예제 #21
0
def modify_in_place(x: DataPoint) -> DataPoint:
    x.d["my_key"] = 0
    x.d_new = x.d
    return x
예제 #22
0
def combine_text(x: DataPoint) -> DataPoint:
    x.text = f"{x.title} {x.article}"
    return x