Esempio n. 1
0
    def __parse_opinion(line, objects_list):
        assert (isinstance(objects_list, list))

        line = line[len(ContextsReader.OPINION_KEY):]

        s_from = line.index(u'b:(')
        s_to = line.index(u')', s_from)
        label = Label.from_int(int(line[s_from + 3:s_to]))

        o_from = line.index(u'oi:[')
        o_to = line.index(u']', o_from)
        left_object_id, right_object_id = line[o_from + 4:o_to].split(u',')

        left_object_id = int(left_object_id)
        right_object_id = int(right_object_id)

        ref_opinion = RefOpinion(left_index=left_object_id,
                                 right_index=right_object_id,
                                 sentiment=label,
                                 owner=objects_list)

        s_from = line.index(u'si:{')
        s_to = line.index(u'}', s_from)
        opninion_key = line[s_from + 4:s_to]

        ref_opinion.set_tag(opninion_key)

        return ref_opinion
Esempio n. 2
0
    def predict(self, dest_data_type=DataType.Test):

        self.relation_collections[dest_data_type].reset_labels()

        for index, relation_groups in enumerate(
                self.relation_collections[dest_data_type].
                iter_by_linked_relations_groups(self.Settings.BatchSize)):

            batch = Batch(relation_groups, self.Settings.GroupSize)
            feed_dict = self.create_feed_dict(batch, dest_data_type)

            result = self.sess.run([self.network.Labels], feed_dict=feed_dict)
            uint_labels = result[0]

            for group_index, group in enumerate(batch.iter_groups):
                for relation in group:
                    assert (isinstance(relation, ExtractedRelation))
                    self.relation_collections[dest_data_type].apply_label(
                        label=Label.from_uint(int(uint_labels[group_index])),
                        relation_id=relation.relation_id)

        for news_ID in self.io.get_data_indices(dest_data_type):
            collection = OpinionCollection(None, self.synonyms,
                                           self.Settings.Stemmer)
            self.relation_collections[dest_data_type].fill_opinion_collection(
                collection,
                news_ID,
                lambda labels: labels[0],
                debug_check_collection=False)

            collection.save(
                self.io.get_opinion_output_filepath(
                    news_ID, self.io.get_model_root(dest_data_type)))

        return self._evaluate(dest_data_type, self.Settings.Stemmer)
Esempio n. 3
0
def read_opinions(filepath,
                  synonyms,
                  custom_opin_ends_iter=None,
                  read_sentiment=True,
                  skip_non_added=True):
    assert (isinstance(synonyms, SynonymsCollection))
    assert (callable(custom_opin_ends_iter) or custom_opin_ends_iter is None)
    assert (isinstance(read_sentiment, bool))
    assert (isinstance(skip_non_added, bool))

    opinions = OpinionCollection(opinions=[], synonyms=synonyms)

    it = __iter_opinion_end_values(filepath, read_sentiment) if custom_opin_ends_iter is None \
        else custom_opin_ends_iter(read_sentiment)

    for left_value, right_value, sentiment in tqdm(it, "Reading opinions:"):

        o = Opinion(value_left=left_value,
                    value_right=right_value,
                    sentiment=Label.from_int(sentiment))

        add_result = opinions.try_add_opinion(o)

        msg = "Warning: opinion '{}->{}' was skipped!".format(
            o.value_left, o.value_right)

        if add_result is False:
            if not skip_non_added:
                raise Exception(msg)
            else:
                print(msg)

    return opinions
Esempio n. 4
0
def create_test_opinions(test_collections, labels, synonyms_filepath, stemmer):
    assert (isinstance(test_collections, list))
    assert (isinstance(labels, np.ndarray))
    assert (isinstance(stemmer, Stemmer))

    label_index = 0
    opinion_collection_list = []
    synonyms = SynonymsCollection.from_file(synonyms_filepath, stemmer=stemmer)

    for c in test_collections:
        opinions = OpinionCollection(None, synonyms, stemmer)
        for opinion_vector in c:
            l = Label.from_int(int(labels[label_index]))
            opinion_vector.set_label(l)
            o = opinions.create_opinion(opinion_vector.value_left,
                                        opinion_vector.value_right,
                                        opinion_vector.label)

            if not opinions.has_opinion_by_synonyms(o) and not isinstance(
                    l, NeutralLabel):
                opinions.add_opinion(o)
            elif not isinstance(l, NeutralLabel):
                print "Failed for o={}".format(o.to_unicode().encode('utf-8'))

            label_index += 1
        opinion_collection_list.append(opinions)
    return opinion_collection_list
Esempio n. 5
0
    def decide_label_of_pair_in_title_optional(self, i, j, title_objects,
                                               title_frames):

        self.__debug_title_opinions_checked += 1

        # Checking left object.
        l_obj = title_objects.get_object(i)
        if not self.__ner_types_limitation.is_auth(l_obj):
            self.__debug_title_opinions_with_objs_non_valid_by_type += 1
            return None

        # Checking right object.
        r_obj = title_objects.get_object(j)
        if not self.__ner_types_limitation.is_auth(r_obj):
            self.__debug_title_opinions_with_objs_non_valid_by_type += 1
            return None

        # Getting object bounds
        l_bound = l_obj.get_bound()
        r_bound = r_obj.get_bound()

        frame_variants_in = self.__get_frames_within(
            left_in=l_bound.TermIndex + l_bound.Length,
            right_in=r_bound.TermIndex - 1,
            text_frame_variants=title_frames)

        text_polarities, is_inverted = get_frames_polarities(
            text_frame_variants=frame_variants_in, frames=self.Settings.Frames)

        self.__debug_title_opinions_processed_by_frames += 1

        if len(frame_variants_in) == 0:
            self.__debug_title_opinions_with_empty_frames += 1
            return None

        if len(frame_variants_in) != len(text_polarities):
            self.__debug_title_opinions_with_polarities_missed += 1
            return None

        labels = [
            optional_invert_label(p.Label, is_inverted[p_index]).to_int()
            for p_index, p in enumerate(text_polarities)
        ]

        label = mean(labels)

        # Force to negative if there is a negative example
        if -1 in labels:
            label = -1

        if -1 < label < 1:
            self.__debug_title_opinions_with_unknown_label += 1
            return None

        self.__debug_valid += 1

        return Label.from_int(int(label))
Esempio n. 6
0
    def create_label_from_relations(relation_labels, label_creation_mode):
        assert (isinstance(relation_labels, list))
        assert (isinstance(label_creation_mode, unicode))

        label = None
        if label_creation_mode == LabelCalculationMode.FIRST_APPEARED:
            label = relation_labels[0]
        if label_creation_mode == LabelCalculationMode.AVERAGE:
            forwards = [l.Forward.to_int() for l in relation_labels]
            backwards = [l.Backward.to_int() for l in relation_labels]
            label = LabelPair(forward=Label.from_int(np.sign(sum(forwards))),
                              backward=Label.from_int(np.sign(sum(backwards))))

        if DebugKeys.PredictLabel:
            print[l.to_int() for l in relation_labels]
            print "Result: {}".format(label.to_int())

        # TODO: Correct label

        return label
Esempio n. 7
0
        def calculate_label(relation_labels):
            assert(isinstance(relation_labels, list))

            label = None
            if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.FIRST_APPEARED:
                label = relation_labels[0]
            if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.AVERAGE:
                label = Label.from_int(np.sign(sum([l.to_int() for l in relation_labels])))

            if DebugKeys.PredictLabel:
                print [l.to_int() for l in relation_labels]
                print "Result: {}".format(label.to_int())

            return label
Esempio n. 8
0
    def from_file(cls, filepath, synonyms_filepaths, stemmer, debug=False):
        """
            filepath: string or list
                single filepath or list of filepaths.
        """
        assert (isinstance(filepath, unicode) or isinstance(filepath, list))
        assert (isinstance(stemmer, Stemmer))

        if isinstance(synonyms_filepaths, unicode):
            synonyms = SynonymsCollection.from_file(synonyms_filepaths,
                                                    stemmer=stemmer,
                                                    debug=debug)
        elif isinstance(synonyms_filepaths, list):
            synonyms = SynonymsCollection.from_files(synonyms_filepaths,
                                                     stemmer=stemmer,
                                                     debug=debug)
        else:
            raise Exception("Unsupported type for 'synonyms_filepaths'")

        opinions = []
        filepaths = []
        if (isinstance(filepath, unicode)):
            filepaths.append(filepath)
        elif (isinstance(filepath, unicode)):
            filepaths = filepath

        for fp in filepaths:
            with io.open(fp, "r", encoding='utf-8') as f:
                for i, line in enumerate(f.readlines()):

                    if line == '\n':
                        continue

                    args = line.strip().split(',')

                    if len(args) < 3:
                        print "should be at least 3 arguments: {}, '{}'".format(
                            i, line.encode('utf-8'))
                        continue

                    entity_left = args[0].strip()
                    entity_right = args[1].strip()
                    sentiment = Label.from_str(args[2].strip())

                    o = Opinion(entity_left, entity_right, sentiment, stemmer)
                    opinions.append(o)

        return cls(opinions, synonyms, stemmer, debug)
Esempio n. 9
0
    def apply_labels(self, uint_labels, minibatch):
        """
        uint_labels: list of int
            each label could be as follows: 0 -- neutral, and 1 -- positive, 2 -- negative

        Applying labels for each bag. It is supposed that labels and bags have
        the same order.
        """
        assert (isinstance(uint_labels, list))
        assert (isinstance(minibatch, MiniBatch))
        index = 0
        for bag in minibatch.bags:
            for sample in bag.samples:
                label = Label.from_uint(uint_labels[index])
                self._find_relation_and_set_label(sample.position, label)
                index += 1
Esempio n. 10
0
    def from_file(cls, filepath, synonyms):
        assert (isinstance(synonyms, SynonymsCollection))

        opinions = []
        with io.open(filepath, "r", encoding='utf-8') as f:
            for i, line in enumerate(f.readlines()):

                if line == '\n':
                    continue

                args = line.strip().split(',')
                assert (len(args) >= 3)

                entity_left = args[0].strip()
                entity_right = args[1].strip()
                sentiment = Label.from_str(args[2].strip())

                o = Opinion(entity_left, entity_right, sentiment)
                opinions.append(o)

        return cls(opinions, synonyms)
Esempio n. 11
0
    def from_file(cls, filepath):
        """ Read the vectors from *.vectors.txt file
        """
        vectors = {}
        with io.open(filepath, 'r', encoding='utf-8') as f:
            for line in f.readlines():
                args = line.split(',')

                opinion_value_left = args[0].strip()
                opinion_value_right = args[1].strip()
                label = Label.from_str(args[len(args) - 1].strip())
                vector = np.array(
                    [float(args[i]) for i in range(2,
                                                   len(args) - 1)])

                key = cls.___create_key(opinion_value_left,
                                        opinion_value_right)

                vectors[key] = OpinionVector(opinion_value_left,
                                             opinion_value_right, vector,
                                             label)

        return cls(vectors)
Esempio n. 12
0
    def predict(self, dest_data_type=DataType.Test):

        def calculate_label(relation_labels):
            assert(isinstance(relation_labels, list))

            label = None
            if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.FIRST_APPEARED:
                label = relation_labels[0]
            if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.AVERAGE:
                label = Label.from_int(np.sign(sum([l.to_int() for l in relation_labels])))

            if DebugKeys.PredictLabel:
                print [l.to_int() for l in relation_labels]
                print "Result: {}".format(label.to_int())

            return label

        assert(isinstance(dest_data_type, unicode))

        self._relations_collections[dest_data_type].reset_labels()
        prediction_collection = RelationPredictionResultCollection(len(self._relations_collections[dest_data_type]))

        for bags_group in self.bags_collection[dest_data_type].iter_by_groups(self.Settings.BagsPerMinibatch):

            minibatch = MiniBatch(bags_group)
            feed_dict = self.create_feed_dict(minibatch, data_type=dest_data_type)

            log_names, log_params = self.network.Log
            result = self.sess.run([self.network.Labels, self.network.Output] + log_params, feed_dict=feed_dict)
            uint_labels = result[0]
            output = result[1]

            if DebugKeys.PredictBatchDisplayLog:
                self._display_log(log_names, result[2:])

            # apply labels
            sample_indices_count = 0
            for sample_index, sample in enumerate(minibatch.iter_by_samples()):
                label = Label.from_uint(int(uint_labels[sample_index]))
                self._relations_collections[dest_data_type].apply_label(label, sample.RelationID)
                prediction_collection.add(sample.RelationID, RelationPredictionResult(output[sample_index]))
                sample_indices_count += 1

            assert(sample_indices_count == len(uint_labels))

        assert(self._relations_collections[dest_data_type].debug_check_all_relations_has_labels())

        self._relations_collections[dest_data_type].debug_labels_statistic(dest_data_type)

        # Compose Result
        self._relations_collections[dest_data_type].save(
            self.io.get_relations_filepath(data_type=dest_data_type,
                                           epoch=self._last_fit_epoch_index))

        prediction_collection.save(
            self.io.get_relations_prediction_filepath(data_type=dest_data_type,
                                                      epoch=self._last_fit_epoch_index))

        for news_ID in self.io.get_data_indices(dest_data_type):
            collection = OpinionCollection(None, self.synonyms, self.settings.Stemmer)
            self._relations_collections[dest_data_type].fill_opinion_collection(collection, news_ID, calculate_label)

            collection.save(self.io.get_opinion_output_filepath(news_ID, self.io.get_model_root(dest_data_type)))

        return self._evaluate(dest_data_type, self.Settings.Stemmer)
Esempio n. 13
0
 def __frame_polarity_from_args(args):
     return FramePolarity(src=args[0],
                          dest=args[1],
                          label=Label.from_str(args[2]),
                          prob=args[3])