def remove_wrong_structures(sentences, obj_a, obj_b): ''' Removes sentences 1: in which no marker is between the objects 2: in which there are both positive and negative markers between the objects sentences: dictionary a dictionary of sentences obj_a: Argument the first object to be compared obj_b: Argument the second object to be compared ''' sentences_to_delete = [] for sentence in sentences: a_pos = find_pos_in_sentence(obj_a.name, sentence.text) b_pos = find_pos_in_sentence(obj_b.name, sentence.text) if a_pos == -1 and b_pos == -1: sentences_to_delete.append(sentence) continue pos_first = min(a_pos, b_pos) pos_second = max(a_pos, b_pos) has_pos_marker = has_marker(sentence.text, pos_first, pos_second, POSITIVE_MARKERS) has_neg_marker = has_marker(sentence.text, pos_first, pos_second, NEGATIVE_MARKERS) if (has_pos_marker and has_neg_marker) or (not has_pos_marker and not has_neg_marker): sentences_to_delete.append(sentence) for sentence in sentences_to_delete: del sentences[sentences.index(sentence)] return sentences
def prepare_sentence_DF(sentences, obj_a, obj_b): index = 0 temp_list = [] for sentence in sentences: pos_a = find_pos_in_sentence(obj_a.name, sentence.text) pos_b = find_pos_in_sentence(obj_b.name, sentence.text) if pos_a < pos_b: temp_list.append([obj_a.name, obj_b.name, sentence.text]) else: temp_list.append([obj_b.name, obj_a.name, sentence.text]) index += 1 sentence_df = pd.DataFrame.from_records(temp_list, columns=['object_a', 'object_b', 'sentence']) return sentence_df
def prepare_sentence_DF(sentences, obj_a, obj_b): index = 0 temp_list = [] for sentence in sentences: pos_a = find_pos_in_sentence(obj_a, sentence) pos_b = find_pos_in_sentence(obj_b, sentence) if (pos_a == -1 or pos_b == -1): pos_a = 0 pos_b = len(sentence) - 1 if pos_a < pos_b: temp_list.append([obj_a, obj_b, sentence]) else: temp_list.append([obj_b, obj_a, sentence]) print (pos_a, pos_b) index += 1 sentence_df = pd.DataFrame.from_records(temp_list, columns=['object_a', 'object_b', 'sentence']) return sentence_df
def marker_pos(sentence, pos_first, pos_second, marker): ''' Checks if the given marker is contained in the sentence and between the given positions, if yes the position is returned, else -1 is returned ''' pos_marker = find_pos_in_sentence(marker, sentence) if pos_marker != -1 and pos_first < pos_marker < pos_second: return pos_marker # found a marker between the objects return -1
def transform(self, dataframe): results = [] for index, row in dataframe.iterrows(): a, b, text = row['object_a'], row['object_b'], row['sentence'] a_index, b_index = find_pos_in_sentence( a, text), find_pos_in_sentence(b, text) if a_index < b_index: begin, end = a_index, b_index + len(b) else: begin, end = b_index, a_index + len(a) res = process(text[begin:end], a, b, self.processing, rep_a=self.rep_a, rep_b=self.rep_b) results.append(res) return results
def what_is_better(sentence, obj_a, obj_b): ''' Analyzes a sentence that contains two given objects. Returns object containing winner and a boolean marking multiple markers. Currently only sentences are supported that are built in the form of ... object ... marker ... object ... sentence: String the sentence to analyze. Has to contain obj_a and obj_b. obj_a: Argument the first object to be compared to the second. obj_b: Argument the second object to be compared to the first. ''' sentence = sentence.lower() result = {} a_pos = find_pos_in_sentence(obj_a.name, sentence) b_pos = find_pos_in_sentence(obj_b.name, sentence) first_pos = min(a_pos, b_pos) second_pos = max(a_pos, b_pos) opp_pos = get_marker_pos(sentence, first_pos, second_pos, OPPOSITE_MARKERS) neg_pos = get_marker_pos(sentence, first_pos, second_pos, NEGATIONS) positive_pos = get_marker_pos(sentence, first_pos, second_pos, POSITIVE_MARKERS) if positive_pos != -1: # there's a positive marker, check if a won result['marker_cnt'] = get_marker_count(sentence, first_pos, second_pos, POSITIVE_MARKERS) result['winner'] = obj_a if obj_a_wins_sentence( first_pos, a_pos, opp_pos, neg_pos, positive_pos) else obj_b return result # we can return because there's never both markers in a sentence negative_pos = get_marker_pos(sentence, first_pos, second_pos, NEGATIVE_MARKERS) result['marker_cnt'] = get_marker_count(sentence, first_pos, second_pos, NEGATIVE_MARKERS) # we're only here if there's no positive marker, so there must be negative one result['winner'] = obj_b if obj_a_wins_sentence( first_pos, a_pos, opp_pos, neg_pos, negative_pos) else obj_a return result