Exemple #1
0
    def from_file(cls, filepath, synonyms):
        assert (isinstance(filepath, str))
        assert (isinstance(synonyms, SynonymsCollection))

        instance = cls(synonyms=synonyms)

        with open(filepath, 'r') as f:
            it = cls.iter_line_params(f)
            for args in tqdm(it, desc="Init BasePrinter from file"):

                pos_count, neg_count, source_id, target_id, _ = args

                value_left = synonyms.get_group_by_index(int(source_id))[0]
                value_right = synonyms.get_group_by_index(int(target_id))[0]

                pos_opinion = Opinion(value_left=value_left,
                                      value_right=value_right,
                                      sentiment=PositiveLabel())

                neg_opinion = Opinion(value_left=value_left,
                                      value_right=value_right,
                                      sentiment=NegativeLabel())

                if pos_count > 0:
                    instance.register_extracted_opinion(pos_opinion,
                                                        count=pos_count)
                if neg_count > 0:
                    instance.register_extracted_opinion(neg_opinion,
                                                        count=neg_count)

        return instance
Exemple #2
0
    def create_opinions_by_relation_and_label(extracted_relation, label):
        assert (isinstance(extracted_relation, ExtractedRelation))
        assert (isinstance(label, LabelPair))

        forward_opinion = Opinion(
            value_left=extracted_relation.LeftEntityValue,
            value_right=extracted_relation.RightEntityValue,
            sentiment=label.Forward)

        backward_opinion = Opinion(
            value_left=extracted_relation.RightEntityValue,
            value_right=extracted_relation.LeftEntityValue,
            sentiment=label.Backward)

        return [forward_opinion, backward_opinion]
Exemple #3
0
def read_opinions(filepath,
                  synonyms,
                  custom_opin_ends_iter=None,
                  read_sentiment=True,
                  skip_non_added=True):
    assert (isinstance(synonyms, SynonymsCollection))
    assert (callable(custom_opin_ends_iter) or custom_opin_ends_iter is None)
    assert (isinstance(read_sentiment, bool))
    assert (isinstance(skip_non_added, bool))

    opinions = OpinionCollection(opinions=[], synonyms=synonyms)

    it = __iter_opinion_end_values(filepath, read_sentiment) if custom_opin_ends_iter is None \
        else custom_opin_ends_iter(read_sentiment)

    for left_value, right_value, sentiment in tqdm(it, "Reading opinions:"):

        o = Opinion(value_left=left_value,
                    value_right=right_value,
                    sentiment=Label.from_int(sentiment))

        add_result = opinions.try_add_opinion(o)

        msg = "Warning: opinion '{}->{}' was skipped!".format(
            o.value_left, o.value_right)

        if add_result is False:
            if not skip_non_added:
                raise Exception(msg)
            else:
                print(msg)

    return opinions
Exemple #4
0
def iter_relevant_file_ids(source_filepath, opinions):
    assert (isinstance(opinions, OpinionCollection))

    with open(source_filepath, 'r') as f:

        current_file = None
        skip_doc = False

        for line in tqdm(f.readlines(), desc=source_filepath):

            if 'File:' in line:
                current_file = line.split(':')[1].strip()
                skip_doc = False

            if 'Attitude:' in line and not skip_doc:
                s_from = line.index(u"'")
                s_to = line.index(u"'", s_from + 1)
                source_value = line[s_from + 1:s_to]

                t_from = line.index(u"'", s_to + 1)

                if "'" not in line[t_from + 1:]:
                    print(line)

                t_to = line.index(u"'", t_from + 1)
                target_value = line[t_from + 1:t_to]

                o = Opinion(value_left=source_value,
                            value_right=target_value,
                            sentiment=NeutralLabel())

                if opinions.has_synonymous_opinion(o):
                    yield current_file
                    skip_doc = True
    def __find_or_create_reversed_opinion(opinion, opinion_collections):
        assert(isinstance(opinion, Opinion))
        assert(isinstance(opinion_collections, collections.Iterable))

        reversed_opinion = Opinion(opinion.value_right, opinion.value_left, NeutralLabel())

        for collection in opinion_collections:
            if collection.has_synonymous_opinion(reversed_opinion):
                return collection.get_synonymous_opinion(reversed_opinion)

        return reversed_opinion
    def decide_label_of_pair_in_title_optional(self, i, j, title_objects, title_frames):
        l_obj = title_objects.get_object(i)
        r_obj = title_objects.get_object(j)

        opinion = Opinion(value_left=l_obj.get_value(),
                          value_right=r_obj.get_value(),
                          sentiment=NeutralLabel())

        if not self.__expected_opinions.has_synonymous_opinion(opinion):
            return None

        return self.__expected_opinions.get_synonymous_opinion(opinion).sentiment
Exemple #7
0
    def __clone_with_different_label(self, opinions, label):
        assert(isinstance(opinions, OpinionCollection))
        assert(isinstance(label, Label))

        ro = OpinionCollection(opinions=[],
                               synonyms=self.Synonyms)

        for o in opinions:
            assert(isinstance(o, Opinion))
            no = Opinion(value_left=o.value_left,
                         value_right=o.value_right,
                         sentiment=label)

            ro.add_opinion(no)

        return ro
    def __check_ref_opin_in_collection(self, ref_opinion, opinions, text_objects, is_same):
        assert(isinstance(ref_opinion, RefOpinion))
        assert(isinstance(opinions, OpinionCollection))
        assert(isinstance(text_objects, TextObjectsCollection))
        assert(isinstance(is_same, bool))

        l_obj = text_objects.get_object(ref_opinion.LeftIndex)
        r_obj = text_objects.get_object(ref_opinion.RightIndex)

        o = Opinion(value_left=l_obj.get_value(),
                    value_right=r_obj.get_value(),
                    sentiment=ref_opinion.Sentiment)

        if opinions.has_synonymous_opinion(o):
            o_existed = opinions.get_synonymous_opinion(o)
            return (o_existed.sentiment != o.sentiment and not is_same) or\
                   (o_existed.sentiment == o.sentiment and is_same)

        return False
Exemple #9
0
    def __extract_sentence_opinion_refs(text_objects_collection, title_opinions, synonyms):
        assert(isinstance(text_objects_collection, TextObjectsCollection))

        opinion_list = []
        opinion_refs = []
        added_opinions = OpinionCollection(opinions=None, synonyms=synonyms)

        TextProcessor.__setup_tags(text_objects_collection=text_objects_collection,
                                   synonyms=synonyms)

        for l_obj in text_objects_collection:
            for r_obj in text_objects_collection:

                if l_obj.CollectionInd == r_obj.CollectionInd:
                    continue

                opinion = Opinion(value_left=l_obj.get_value(),
                                  value_right=r_obj.get_value(),
                                  sentiment=NeutralLabel())

                is_title_already_has_opinion = title_opinions.has_synonymous_opinion(opinion)
                is_already_added = added_opinions.has_synonymous_opinion(opinion)

                is_appropriate = is_title_already_has_opinion and not is_already_added

                if not is_appropriate:
                    continue

                opinion = title_opinions.get_synonymous_opinion(opinion)
                o = RefOpinion(left_index=l_obj.CollectionInd,
                               right_index=r_obj.CollectionInd,
                               sentiment=opinion.sentiment)
                opinion_refs.append(o)

                opinion_list.append(opinion)

                add_result = added_opinions.try_add_opinion(opinion)
                assert(add_result)

        return opinion_refs, opinion_list
Exemple #10
0
def opinions_between_entities(E, diff, news, synonyms, sentiment_opins=None):
    """ Relations that had the same difference
    """
    def try_add_opinion(o, added, neutral_opins):
        assert (isinstance(o, Opinion))
        assert (isinstance(neutral_opins, OpinionCollection))

        # Filter if there is a sentiment relation
        if sentiment_opins is not None:
            if sentiment_opins.has_opinion_by_synonyms(o):
                return

        if neutral_opins.has_opinion_by_synonyms(o):
            return

        added.add(o.create_value_id())
        neutral_opins.add_opinion(o)

    def is_ignored(entity):
        # TODO. Move ignored entities into core.
        return env.stemmer.lemmatize_to_str(entity.value) in IGNORED_ENTITIES

    def get_entity_synonyms(entity):
        return synonyms.get_synonyms_list(entity.value), \
               synonyms.get_synonym_group_index(entity.value)

    added = set()
    c = OpinionCollection(opinions=None, synonyms=synonyms)

    for i in range(E.shape[0]):
        for j in range(E.shape[1]):

            if E[i][j] != diff:
                continue

            e1 = news.entities.get_entity_by_index(i)
            e2 = news.entities.get_entity_by_index(j)

            if is_ignored(e1) or is_ignored(e2):
                continue

            if not synonyms.has_synonym(e1.value):
                synonyms.add_synonym(e1.value)

            if not synonyms.has_synonym(e2.value):
                synonyms.add_synonym(e2.value)

            sl1, g1 = get_entity_synonyms(e1)
            sl2, g2 = get_entity_synonyms(e2)

            r_left = sl1[0]
            r_right = sl2[0]

            # Filter the same groups
            if g1 == g2:
                "Entities '{}', and '{}' a part of the same synonym group".format(
                    r_left.encode('utf-8'), r_right.encode('utf-8'))
                continue

            try_add_opinion(Opinion(r_left, r_right, NeutralLabel()), added, c)
            try_add_opinion(Opinion(r_right, r_left, NeutralLabel()), added, c)

    return c
Exemple #11
0
    def _extract_opinions_from_title(self, title_terms, title_objects, title_frames, synonyms):
        assert(isinstance(title_terms, list))
        assert(isinstance(title_objects, TextObjectsCollection))
        assert(isinstance(title_frames, TextFrameVariantsCollection))
        assert(isinstance(synonyms, SynonymsCollection))

        opinion_refs = []
        title_opinions = OpinionCollection(opinions=None, synonyms=synonyms)

        TextProcessor.__setup_tags(text_objects_collection=title_objects,
                                   synonyms=synonyms)

        for l_obj in title_objects:
            for r_obj in title_objects:

                l_bound = l_obj.get_bound()
                r_bound = r_obj.get_bound()

                if l_bound.TermIndex == r_bound.TermIndex:
                    continue

                if l_bound.TermIndex >= r_bound.TermIndex:
                    continue

                i = l_obj.CollectionInd
                j = r_obj.CollectionInd

                if not self.__check_auth_correctness(i=i, j=j, objects=title_objects):
                    continue

                label = self.decide_label_of_pair_in_title_optional(
                    i=i, j=j,
                    title_objects=title_objects,
                    title_frames=title_frames)

                if label is None:
                    # Considered by pair-base processor
                    continue

                opinion = Opinion(value_left=l_obj.get_value(),
                                  value_right=r_obj.get_value(),
                                  sentiment=label)

                self.__debug_opinions_created += 1

                if self.__check_obj_preposition_in_title:
                    if self.__reject_by_russian_prepositions(l_obj=l_obj, r_obj=r_obj, title_terms=title_terms):
                        self.__debug_opinions_rejected_by_preps += 1
                        continue

                if not self.__guarantee_synonyms_presence(synonyms=synonyms, obj_value=opinion.value_left):
                    self.__debug_opinions_with_missed_synonyms += 1
                    continue

                if not self.__guarantee_synonyms_presence(synonyms=synonyms, obj_value=opinion.value_right):
                    self.__debug_opinions_with_missed_synonyms += 1
                    continue

                lg_ind = synonyms.get_synonym_group_index(opinion.value_left)
                rg_ind = synonyms.get_synonym_group_index(opinion.value_right)

                if lg_ind == rg_ind:
                    self.__debug_opinions_looped += 1
                    continue

                if not title_opinions.has_synonymous_opinion(opinion):
                    # OK, adding
                    self.__debug_opinions_total_extracted_from_titles += 1
                    add_result = title_opinions.try_add_opinion(opinion)
                    assert(add_result)
                else:
                    self.__debug_opinions_title_synonymous_existed += 1

                opinion_ref = RefOpinion(left_index=i, right_index=j, sentiment=opinion.sentiment)
                opinion_refs.append(opinion_ref)

        return opinion_refs, title_opinions
Exemple #12
0
 def create_opinion(self):
     return Opinion(self.left_entity_value, self.right_entity_value,
                    self.label)