Ejemplo n.º 1
0
    def __parse_opinion(line, objects_list):
        assert (isinstance(objects_list, list))

        line = line[len(ContextsReader.OPINION_KEY):]

        s_from = line.index(u'b:(')
        s_to = line.index(u')', s_from)
        label = Label.from_int(int(line[s_from + 3:s_to]))

        o_from = line.index(u'oi:[')
        o_to = line.index(u']', o_from)
        left_object_id, right_object_id = line[o_from + 4:o_to].split(u',')

        left_object_id = int(left_object_id)
        right_object_id = int(right_object_id)

        ref_opinion = RefOpinion(left_index=left_object_id,
                                 right_index=right_object_id,
                                 sentiment=label,
                                 owner=objects_list)

        s_from = line.index(u'si:{')
        s_to = line.index(u'}', s_from)
        opninion_key = line[s_from + 4:s_to]

        ref_opinion.set_tag(opninion_key)

        return ref_opinion
Ejemplo n.º 2
0
def read_opinions(filepath,
                  synonyms,
                  custom_opin_ends_iter=None,
                  read_sentiment=True,
                  skip_non_added=True):
    assert (isinstance(synonyms, SynonymsCollection))
    assert (callable(custom_opin_ends_iter) or custom_opin_ends_iter is None)
    assert (isinstance(read_sentiment, bool))
    assert (isinstance(skip_non_added, bool))

    opinions = OpinionCollection(opinions=[], synonyms=synonyms)

    it = __iter_opinion_end_values(filepath, read_sentiment) if custom_opin_ends_iter is None \
        else custom_opin_ends_iter(read_sentiment)

    for left_value, right_value, sentiment in tqdm(it, "Reading opinions:"):

        o = Opinion(value_left=left_value,
                    value_right=right_value,
                    sentiment=Label.from_int(sentiment))

        add_result = opinions.try_add_opinion(o)

        msg = "Warning: opinion '{}->{}' was skipped!".format(
            o.value_left, o.value_right)

        if add_result is False:
            if not skip_non_added:
                raise Exception(msg)
            else:
                print(msg)

    return opinions
Ejemplo n.º 3
0
def create_test_opinions(test_collections, labels, synonyms_filepath, stemmer):
    assert (isinstance(test_collections, list))
    assert (isinstance(labels, np.ndarray))
    assert (isinstance(stemmer, Stemmer))

    label_index = 0
    opinion_collection_list = []
    synonyms = SynonymsCollection.from_file(synonyms_filepath, stemmer=stemmer)

    for c in test_collections:
        opinions = OpinionCollection(None, synonyms, stemmer)
        for opinion_vector in c:
            l = Label.from_int(int(labels[label_index]))
            opinion_vector.set_label(l)
            o = opinions.create_opinion(opinion_vector.value_left,
                                        opinion_vector.value_right,
                                        opinion_vector.label)

            if not opinions.has_opinion_by_synonyms(o) and not isinstance(
                    l, NeutralLabel):
                opinions.add_opinion(o)
            elif not isinstance(l, NeutralLabel):
                print "Failed for o={}".format(o.to_unicode().encode('utf-8'))

            label_index += 1
        opinion_collection_list.append(opinions)
    return opinion_collection_list
Ejemplo n.º 4
0
    def decide_label_of_pair_in_title_optional(self, i, j, title_objects,
                                               title_frames):

        self.__debug_title_opinions_checked += 1

        # Checking left object.
        l_obj = title_objects.get_object(i)
        if not self.__ner_types_limitation.is_auth(l_obj):
            self.__debug_title_opinions_with_objs_non_valid_by_type += 1
            return None

        # Checking right object.
        r_obj = title_objects.get_object(j)
        if not self.__ner_types_limitation.is_auth(r_obj):
            self.__debug_title_opinions_with_objs_non_valid_by_type += 1
            return None

        # Getting object bounds
        l_bound = l_obj.get_bound()
        r_bound = r_obj.get_bound()

        frame_variants_in = self.__get_frames_within(
            left_in=l_bound.TermIndex + l_bound.Length,
            right_in=r_bound.TermIndex - 1,
            text_frame_variants=title_frames)

        text_polarities, is_inverted = get_frames_polarities(
            text_frame_variants=frame_variants_in, frames=self.Settings.Frames)

        self.__debug_title_opinions_processed_by_frames += 1

        if len(frame_variants_in) == 0:
            self.__debug_title_opinions_with_empty_frames += 1
            return None

        if len(frame_variants_in) != len(text_polarities):
            self.__debug_title_opinions_with_polarities_missed += 1
            return None

        labels = [
            optional_invert_label(p.Label, is_inverted[p_index]).to_int()
            for p_index, p in enumerate(text_polarities)
        ]

        label = mean(labels)

        # Force to negative if there is a negative example
        if -1 in labels:
            label = -1

        if -1 < label < 1:
            self.__debug_title_opinions_with_unknown_label += 1
            return None

        self.__debug_valid += 1

        return Label.from_int(int(label))
Ejemplo n.º 5
0
    def create_label_from_relations(relation_labels, label_creation_mode):
        assert (isinstance(relation_labels, list))
        assert (isinstance(label_creation_mode, unicode))

        label = None
        if label_creation_mode == LabelCalculationMode.FIRST_APPEARED:
            label = relation_labels[0]
        if label_creation_mode == LabelCalculationMode.AVERAGE:
            forwards = [l.Forward.to_int() for l in relation_labels]
            backwards = [l.Backward.to_int() for l in relation_labels]
            label = LabelPair(forward=Label.from_int(np.sign(sum(forwards))),
                              backward=Label.from_int(np.sign(sum(backwards))))

        if DebugKeys.PredictLabel:
            print[l.to_int() for l in relation_labels]
            print "Result: {}".format(label.to_int())

        # TODO: Correct label

        return label
Ejemplo n.º 6
0
        def calculate_label(relation_labels):
            assert(isinstance(relation_labels, list))

            label = None
            if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.FIRST_APPEARED:
                label = relation_labels[0]
            if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.AVERAGE:
                label = Label.from_int(np.sign(sum([l.to_int() for l in relation_labels])))

            if DebugKeys.PredictLabel:
                print [l.to_int() for l in relation_labels]
                print "Result: {}".format(label.to_int())

            return label