Exemple #1
0
def remove_wrong_structures(sentences, obj_a, obj_b):
    '''
    Removes sentences 1: in which no marker is between the objects
                      2: in which there are both positive and negative markers between the objects

    sentences:  dictionary
                a dictionary of sentences

    obj_a:      Argument
                the first object to be compared

    obj_b:      Argument
                the second object to be compared
    '''
    sentences_to_delete = []
    for sentence in sentences:
        a_pos = find_pos_in_sentence(obj_a.name, sentence.text)
        b_pos = find_pos_in_sentence(obj_b.name, sentence.text)
        if a_pos == -1 and b_pos == -1:
            sentences_to_delete.append(sentence)
            continue
        pos_first = min(a_pos, b_pos)
        pos_second = max(a_pos, b_pos)
        has_pos_marker = has_marker(sentence.text, pos_first, pos_second,
                                    POSITIVE_MARKERS)
        has_neg_marker = has_marker(sentence.text, pos_first, pos_second,
                                    NEGATIVE_MARKERS)
        if (has_pos_marker and has_neg_marker) or (not has_pos_marker
                                                   and not has_neg_marker):
            sentences_to_delete.append(sentence)
    for sentence in sentences_to_delete:
        del sentences[sentences.index(sentence)]
    return sentences
def prepare_sentence_DF(sentences, obj_a, obj_b):
    index = 0
    temp_list = []
    for sentence in sentences:
        pos_a = find_pos_in_sentence(obj_a.name, sentence.text)
        pos_b = find_pos_in_sentence(obj_b.name, sentence.text)
        if pos_a < pos_b:
            temp_list.append([obj_a.name, obj_b.name, sentence.text])
        else:
            temp_list.append([obj_b.name, obj_a.name, sentence.text])
        index += 1
    sentence_df = pd.DataFrame.from_records(temp_list, columns=['object_a', 'object_b', 'sentence'])

    return sentence_df
def prepare_sentence_DF(sentences, obj_a, obj_b):
    index = 0
    temp_list = []
    for sentence in sentences:
        pos_a = find_pos_in_sentence(obj_a, sentence)
        pos_b = find_pos_in_sentence(obj_b, sentence)
        if (pos_a == -1 or pos_b == -1):
            pos_a = 0
            pos_b = len(sentence) - 1
        if pos_a < pos_b:
            temp_list.append([obj_a, obj_b, sentence])
        else:
            temp_list.append([obj_b, obj_a, sentence])
        print (pos_a, pos_b)
        index += 1
    sentence_df = pd.DataFrame.from_records(temp_list, columns=['object_a', 'object_b', 'sentence'])

    return sentence_df
Exemple #4
0
def marker_pos(sentence, pos_first, pos_second, marker):
    '''
    Checks if the given marker is contained in the sentence and between the given positions,
    if yes the position is returned, else -1 is returned
    '''
    pos_marker = find_pos_in_sentence(marker, sentence)
    if pos_marker != -1 and pos_first < pos_marker < pos_second:
        return pos_marker  # found a marker between the objects
    return -1
Exemple #5
0
    def transform(self, dataframe):
        results = []
        for index, row in dataframe.iterrows():
            a, b, text = row['object_a'], row['object_b'], row['sentence']
            a_index, b_index = find_pos_in_sentence(
                a, text), find_pos_in_sentence(b, text)
            if a_index < b_index:
                begin, end = a_index, b_index + len(b)
            else:
                begin, end = b_index, a_index + len(a)
            res = process(text[begin:end],
                          a,
                          b,
                          self.processing,
                          rep_a=self.rep_a,
                          rep_b=self.rep_b)

            results.append(res)

        return results
Exemple #6
0
def what_is_better(sentence, obj_a, obj_b):
    '''
    Analyzes a sentence that contains two given objects. Returns object containing winner
    and a boolean marking multiple markers.
    Currently only sentences are supported that are built in the form of
        ... object ... marker ... object ...

    sentence:   String
                the sentence to analyze. Has to contain obj_a and obj_b.
    obj_a:      Argument
                the first object to be compared to the second.
    obj_b:      Argument
                the second object to be compared to the first.
    '''
    sentence = sentence.lower()
    result = {}

    a_pos = find_pos_in_sentence(obj_a.name, sentence)
    b_pos = find_pos_in_sentence(obj_b.name, sentence)

    first_pos = min(a_pos, b_pos)
    second_pos = max(a_pos, b_pos)
    opp_pos = get_marker_pos(sentence, first_pos, second_pos, OPPOSITE_MARKERS)
    neg_pos = get_marker_pos(sentence, first_pos, second_pos, NEGATIONS)
    positive_pos = get_marker_pos(sentence, first_pos, second_pos,
                                  POSITIVE_MARKERS)
    if positive_pos != -1:  # there's a positive marker, check if a won
        result['marker_cnt'] = get_marker_count(sentence, first_pos,
                                                second_pos, POSITIVE_MARKERS)
        result['winner'] = obj_a if obj_a_wins_sentence(
            first_pos, a_pos, opp_pos, neg_pos, positive_pos) else obj_b
        return result
        # we can return because there's never both markers in a sentence
    negative_pos = get_marker_pos(sentence, first_pos, second_pos,
                                  NEGATIVE_MARKERS)
    result['marker_cnt'] = get_marker_count(sentence, first_pos, second_pos,
                                            NEGATIVE_MARKERS)
    # we're only here if there's no positive marker, so there must be negative one
    result['winner'] = obj_b if obj_a_wins_sentence(
        first_pos, a_pos, opp_pos, neg_pos, negative_pos) else obj_a
    return result