def from_file(cls, filepath, synonyms): assert (isinstance(filepath, str)) assert (isinstance(synonyms, SynonymsCollection)) instance = cls(synonyms=synonyms) with open(filepath, 'r') as f: it = cls.iter_line_params(f) for args in tqdm(it, desc="Init BasePrinter from file"): pos_count, neg_count, source_id, target_id, _ = args value_left = synonyms.get_group_by_index(int(source_id))[0] value_right = synonyms.get_group_by_index(int(target_id))[0] pos_opinion = Opinion(value_left=value_left, value_right=value_right, sentiment=PositiveLabel()) neg_opinion = Opinion(value_left=value_left, value_right=value_right, sentiment=NegativeLabel()) if pos_count > 0: instance.register_extracted_opinion(pos_opinion, count=pos_count) if neg_count > 0: instance.register_extracted_opinion(neg_opinion, count=neg_count) return instance
def create_opinions_by_relation_and_label(extracted_relation, label): assert (isinstance(extracted_relation, ExtractedRelation)) assert (isinstance(label, LabelPair)) forward_opinion = Opinion( value_left=extracted_relation.LeftEntityValue, value_right=extracted_relation.RightEntityValue, sentiment=label.Forward) backward_opinion = Opinion( value_left=extracted_relation.RightEntityValue, value_right=extracted_relation.LeftEntityValue, sentiment=label.Backward) return [forward_opinion, backward_opinion]
def read_opinions(filepath, synonyms, custom_opin_ends_iter=None, read_sentiment=True, skip_non_added=True): assert (isinstance(synonyms, SynonymsCollection)) assert (callable(custom_opin_ends_iter) or custom_opin_ends_iter is None) assert (isinstance(read_sentiment, bool)) assert (isinstance(skip_non_added, bool)) opinions = OpinionCollection(opinions=[], synonyms=synonyms) it = __iter_opinion_end_values(filepath, read_sentiment) if custom_opin_ends_iter is None \ else custom_opin_ends_iter(read_sentiment) for left_value, right_value, sentiment in tqdm(it, "Reading opinions:"): o = Opinion(value_left=left_value, value_right=right_value, sentiment=Label.from_int(sentiment)) add_result = opinions.try_add_opinion(o) msg = "Warning: opinion '{}->{}' was skipped!".format( o.value_left, o.value_right) if add_result is False: if not skip_non_added: raise Exception(msg) else: print(msg) return opinions
def iter_relevant_file_ids(source_filepath, opinions): assert (isinstance(opinions, OpinionCollection)) with open(source_filepath, 'r') as f: current_file = None skip_doc = False for line in tqdm(f.readlines(), desc=source_filepath): if 'File:' in line: current_file = line.split(':')[1].strip() skip_doc = False if 'Attitude:' in line and not skip_doc: s_from = line.index(u"'") s_to = line.index(u"'", s_from + 1) source_value = line[s_from + 1:s_to] t_from = line.index(u"'", s_to + 1) if "'" not in line[t_from + 1:]: print(line) t_to = line.index(u"'", t_from + 1) target_value = line[t_from + 1:t_to] o = Opinion(value_left=source_value, value_right=target_value, sentiment=NeutralLabel()) if opinions.has_synonymous_opinion(o): yield current_file skip_doc = True
def __find_or_create_reversed_opinion(opinion, opinion_collections): assert(isinstance(opinion, Opinion)) assert(isinstance(opinion_collections, collections.Iterable)) reversed_opinion = Opinion(opinion.value_right, opinion.value_left, NeutralLabel()) for collection in opinion_collections: if collection.has_synonymous_opinion(reversed_opinion): return collection.get_synonymous_opinion(reversed_opinion) return reversed_opinion
def decide_label_of_pair_in_title_optional(self, i, j, title_objects, title_frames): l_obj = title_objects.get_object(i) r_obj = title_objects.get_object(j) opinion = Opinion(value_left=l_obj.get_value(), value_right=r_obj.get_value(), sentiment=NeutralLabel()) if not self.__expected_opinions.has_synonymous_opinion(opinion): return None return self.__expected_opinions.get_synonymous_opinion(opinion).sentiment
def __clone_with_different_label(self, opinions, label): assert(isinstance(opinions, OpinionCollection)) assert(isinstance(label, Label)) ro = OpinionCollection(opinions=[], synonyms=self.Synonyms) for o in opinions: assert(isinstance(o, Opinion)) no = Opinion(value_left=o.value_left, value_right=o.value_right, sentiment=label) ro.add_opinion(no) return ro
def __check_ref_opin_in_collection(self, ref_opinion, opinions, text_objects, is_same): assert(isinstance(ref_opinion, RefOpinion)) assert(isinstance(opinions, OpinionCollection)) assert(isinstance(text_objects, TextObjectsCollection)) assert(isinstance(is_same, bool)) l_obj = text_objects.get_object(ref_opinion.LeftIndex) r_obj = text_objects.get_object(ref_opinion.RightIndex) o = Opinion(value_left=l_obj.get_value(), value_right=r_obj.get_value(), sentiment=ref_opinion.Sentiment) if opinions.has_synonymous_opinion(o): o_existed = opinions.get_synonymous_opinion(o) return (o_existed.sentiment != o.sentiment and not is_same) or\ (o_existed.sentiment == o.sentiment and is_same) return False
def __extract_sentence_opinion_refs(text_objects_collection, title_opinions, synonyms): assert(isinstance(text_objects_collection, TextObjectsCollection)) opinion_list = [] opinion_refs = [] added_opinions = OpinionCollection(opinions=None, synonyms=synonyms) TextProcessor.__setup_tags(text_objects_collection=text_objects_collection, synonyms=synonyms) for l_obj in text_objects_collection: for r_obj in text_objects_collection: if l_obj.CollectionInd == r_obj.CollectionInd: continue opinion = Opinion(value_left=l_obj.get_value(), value_right=r_obj.get_value(), sentiment=NeutralLabel()) is_title_already_has_opinion = title_opinions.has_synonymous_opinion(opinion) is_already_added = added_opinions.has_synonymous_opinion(opinion) is_appropriate = is_title_already_has_opinion and not is_already_added if not is_appropriate: continue opinion = title_opinions.get_synonymous_opinion(opinion) o = RefOpinion(left_index=l_obj.CollectionInd, right_index=r_obj.CollectionInd, sentiment=opinion.sentiment) opinion_refs.append(o) opinion_list.append(opinion) add_result = added_opinions.try_add_opinion(opinion) assert(add_result) return opinion_refs, opinion_list
def opinions_between_entities(E, diff, news, synonyms, sentiment_opins=None): """ Relations that had the same difference """ def try_add_opinion(o, added, neutral_opins): assert (isinstance(o, Opinion)) assert (isinstance(neutral_opins, OpinionCollection)) # Filter if there is a sentiment relation if sentiment_opins is not None: if sentiment_opins.has_opinion_by_synonyms(o): return if neutral_opins.has_opinion_by_synonyms(o): return added.add(o.create_value_id()) neutral_opins.add_opinion(o) def is_ignored(entity): # TODO. Move ignored entities into core. return env.stemmer.lemmatize_to_str(entity.value) in IGNORED_ENTITIES def get_entity_synonyms(entity): return synonyms.get_synonyms_list(entity.value), \ synonyms.get_synonym_group_index(entity.value) added = set() c = OpinionCollection(opinions=None, synonyms=synonyms) for i in range(E.shape[0]): for j in range(E.shape[1]): if E[i][j] != diff: continue e1 = news.entities.get_entity_by_index(i) e2 = news.entities.get_entity_by_index(j) if is_ignored(e1) or is_ignored(e2): continue if not synonyms.has_synonym(e1.value): synonyms.add_synonym(e1.value) if not synonyms.has_synonym(e2.value): synonyms.add_synonym(e2.value) sl1, g1 = get_entity_synonyms(e1) sl2, g2 = get_entity_synonyms(e2) r_left = sl1[0] r_right = sl2[0] # Filter the same groups if g1 == g2: "Entities '{}', and '{}' a part of the same synonym group".format( r_left.encode('utf-8'), r_right.encode('utf-8')) continue try_add_opinion(Opinion(r_left, r_right, NeutralLabel()), added, c) try_add_opinion(Opinion(r_right, r_left, NeutralLabel()), added, c) return c
def _extract_opinions_from_title(self, title_terms, title_objects, title_frames, synonyms): assert(isinstance(title_terms, list)) assert(isinstance(title_objects, TextObjectsCollection)) assert(isinstance(title_frames, TextFrameVariantsCollection)) assert(isinstance(synonyms, SynonymsCollection)) opinion_refs = [] title_opinions = OpinionCollection(opinions=None, synonyms=synonyms) TextProcessor.__setup_tags(text_objects_collection=title_objects, synonyms=synonyms) for l_obj in title_objects: for r_obj in title_objects: l_bound = l_obj.get_bound() r_bound = r_obj.get_bound() if l_bound.TermIndex == r_bound.TermIndex: continue if l_bound.TermIndex >= r_bound.TermIndex: continue i = l_obj.CollectionInd j = r_obj.CollectionInd if not self.__check_auth_correctness(i=i, j=j, objects=title_objects): continue label = self.decide_label_of_pair_in_title_optional( i=i, j=j, title_objects=title_objects, title_frames=title_frames) if label is None: # Considered by pair-base processor continue opinion = Opinion(value_left=l_obj.get_value(), value_right=r_obj.get_value(), sentiment=label) self.__debug_opinions_created += 1 if self.__check_obj_preposition_in_title: if self.__reject_by_russian_prepositions(l_obj=l_obj, r_obj=r_obj, title_terms=title_terms): self.__debug_opinions_rejected_by_preps += 1 continue if not self.__guarantee_synonyms_presence(synonyms=synonyms, obj_value=opinion.value_left): self.__debug_opinions_with_missed_synonyms += 1 continue if not self.__guarantee_synonyms_presence(synonyms=synonyms, obj_value=opinion.value_right): self.__debug_opinions_with_missed_synonyms += 1 continue lg_ind = synonyms.get_synonym_group_index(opinion.value_left) rg_ind = synonyms.get_synonym_group_index(opinion.value_right) if lg_ind == rg_ind: self.__debug_opinions_looped += 1 continue if not title_opinions.has_synonymous_opinion(opinion): # OK, adding self.__debug_opinions_total_extracted_from_titles += 1 add_result = title_opinions.try_add_opinion(opinion) assert(add_result) else: self.__debug_opinions_title_synonymous_existed += 1 opinion_ref = RefOpinion(left_index=i, right_index=j, sentiment=opinion.sentiment) opinion_refs.append(opinion_ref) return opinion_refs, title_opinions
def create_opinion(self): return Opinion(self.left_entity_value, self.right_entity_value, self.label)