def __init__(self, labels_scaler, stemmer, frames_version, model_io,
                 terms_per_context, dist_in_terms_between_attitude_ends):
        assert (isinstance(frames_version, RuSentiFramesVersions)
                or frames_version is None)
        assert (isinstance(model_io, BertModelIO))
        assert (isinstance(terms_per_context, int))

        self.__dist_in_terms_between_attitude_ends = dist_in_terms_between_attitude_ends

        super(CustomSerializationData, self).__init__(labels_scaler, stemmer)

        self.__model_io = model_io
        self.__terms_per_context = terms_per_context
        self.__opinion_formatter = RuSentRelOpinionCollectionFormatter()

        self.__frames_collection = None
        self.__unique_frame_variants = None

        if frames_version is not None:
            self.__frames_collection = RuSentiFramesCollection.read_collection(
                version=frames_version)
            self.__unique_frame_variants = FrameVariantsCollection.create_unique_variants_from_iterable(
                variants_with_id=self.__frames_collection.
                iter_frame_id_and_variants(),
                stemmer=stemmer)
Example #2
0
def about_version(version=RuSentiFramesVersions.V20):
    stemmer = MystemWrapper()
    pos_tagger = POSMystemWrapper(stemmer.MystemInstance)
    frames_collection = RuSentiFramesCollection.read_collection(
        version=version)
    print("Lexicon version:", version)
    return __about(frames_collection=frames_collection, pos_tagger=pos_tagger)
    def __init__(self, labels_scaler, stemmer, pos_tagger, embedding,
                 dist_in_terms_between_att_ends, terms_per_context,
                 frames_version, str_entity_formatter, opinion_formatter,
                 rusentrel_version):
        assert (isinstance(embedding, Embedding))
        assert (isinstance(stemmer, Stemmer))
        assert (isinstance(pos_tagger, POSTagger))
        assert (isinstance(rusentrel_version, RuSentRelVersions))
        assert (isinstance(frames_version, RuSentiFramesVersions))
        assert (isinstance(str_entity_formatter, StringEntitiesFormatter))
        assert (isinstance(opinion_formatter, OpinionCollectionsFormatter))
        assert (isinstance(dist_in_terms_between_att_ends, int)
                or dist_in_terms_between_att_ends is None)
        assert (isinstance(terms_per_context, int))

        self.__dist_in_terms_between_att_ends = dist_in_terms_between_att_ends

        super(RuSentRelExperimentSerializationData,
              self).__init__(labels_scaler=labels_scaler, stemmer=stemmer)

        self.__pos_tagger = pos_tagger
        self.__terms_per_context = terms_per_context
        self.__rusentrel_version = rusentrel_version
        self.__str_entity_formatter = str_entity_formatter
        self.__word_embedding = embedding
        self.__opinion_formatter = opinion_formatter

        self.__frames_collection = RuSentiFramesCollection.read_collection(
            version=frames_version)
        self.__unique_frame_variants = FrameVariantsCollection.create_unique_variants_from_iterable(
            variants_with_id=self.__frames_collection.
            iter_frame_id_and_variants(),
            stemmer=self.Stemmer)
    def __iter_frame_variants():
        frames_collection = RuSentiFramesCollection.read_collection(RuSentiFramesVersions.V20)

        frame_variants = FrameVariantsCollection()
        frame_variants.fill_from_iterable(variants_with_id=frames_collection.iter_frame_id_and_variants(),
                                          overwrite_existed_variant=True,
                                          raise_error_on_existed_variant=False)

        for v, _ in frame_variants.iter_variants():
            yield v
Example #5
0
    def __create_frames_variants_collection(self):
        frames = RuSentiFramesCollection.read_collection(
            RuSentiFramesVersions.V20)
        frame_variant_collection = FrameVariantsCollection()
        frame_variant_collection.fill_from_iterable(
            variants_with_id=frames.iter_frame_id_and_variants(),
            overwrite_existed_variant=True,
            raise_error_on_existed_variant=False)

        return frame_variant_collection
Example #6
0
    def test_reading(self):

        # Initializing logger
        logger = logging.getLogger(__name__)
        logger.setLevel(logging.INFO)
        logging.basicConfig(level=logging.DEBUG)

        frames = RuSentiFramesCollection.read_collection(
            RuSentiFramesVersions.V20)

        for frame_id in frames.iter_frames_ids():
            # id
            logger.info("Frame: {}".format(frame_id))
            # titles
            logger.info("Titles: {}".format(",".join(
                frames.get_frame_titles(frame_id))))
            # variants
            logger.info("Variants: {}".format(",".join(
                frames.get_frame_variants(frame_id))))
            # roles
            for role in frames.get_frame_roles(frame_id):
                logger.info("Role: {}".format(" -- ".join(
                    [role.Source, role.Description])))
            # states
            for state in frames.get_frame_states(frame_id):
                logger.info("State: {}".format(",".join(
                    [state.Role,
                     state.Label.to_class_str(),
                     str(state.Prob)])))
            # polarity
            for polarity in frames.get_frame_polarities(frame_id):
                logger.info("Polarity: {}".format(",".join([
                    polarity.Source, polarity.Destination,
                    polarity.Label.to_class_str()
                ])))

            has_a0_a1_pol = frames.try_get_frame_polarity(frame_id,
                                                          role_src="a0",
                                                          role_dest="a1")
            logger.info(
                "Has a0->a1 polarity: {}".format(has_a0_a1_pol is not None))

        # frame variants.
        frame_variants = FrameVariantsCollection()
        frame_variants.fill_from_iterable(
            variants_with_id=frames.iter_frame_id_and_variants(),
            overwrite_existed_variant=True,
            raise_error_on_existed_variant=False)

        frame_variant = frame_variants.get_variant_by_value("хвалить")

        assert (isinstance(frame_variant, FrameVariant))

        logger.info("FrameVariantValue: {}".format(frame_variant.get_value()))
        logger.info("FrameID: {}".format(frame_variant.FrameID))
    def setUpClass(cls):
        cls.stemmer = MystemWrapper()
        cls.entities_formatter = RussianEntitiesCasedFormatter(
            pos_tagger=POSMystemWrapper(Mystem(entire_input=False)))
        cls.synonyms = RuSentRelSynonymsCollectionProvider.load_collection(
            stemmer=cls.stemmer)
        cls.frames_collection = RuSentiFramesCollection.read_collection(
            version=RuSentiFramesVersions.V10)

        cls.unique_frame_variants = FrameVariantsCollection()
        cls.unique_frame_variants.fill_from_iterable(
            variants_with_id=cls.frames_collection.iter_frame_id_and_variants(
            ),
            overwrite_existed_variant=True,
            raise_error_on_existed_variant=False)
Example #8
0
    def test_parsing(self):

        # Initializing logger.
        logger = logging.getLogger(__name__)
        logger.setLevel(logging.DEBUG)
        logging.basicConfig(level=logging.DEBUG)

        # Initializing stemmer.
        stemmer = MystemWrapper()

        # frame and variants.
        frames = RuSentiFramesCollection.read_collection(version=RuSentiFramesVersions.V20)
        frame_variants = FrameVariantsCollection()
        frame_variants.fill_from_iterable(variants_with_id=frames.iter_frame_id_and_variants(),
                                          overwrite_existed_variant=True,
                                          raise_error_on_existed_variant=False)

        text_parser = BaseTextParser(pipeline=[RuSentRelTextEntitiesParser(),
                                               DefaultTextTokenizer(keep_tokens=True),
                                               LemmasBasedFrameVariantsParser(frame_variants=frame_variants,
                                                                              stemmer=stemmer,
                                                                              save_lemmas=False),
                                               FrameVariantsSentimentNegation()])

        # Reading synonyms collection.
        synonyms = RuSentRelSynonymsCollectionProvider.load_collection(stemmer=stemmer)

        version = RuSentRelVersions.V11
        for doc_id in RuSentRelIOUtils.iter_collection_indices(version):

            # Parsing
            news = RuSentRelNews.read_document(doc_id=doc_id,
                                               synonyms=synonyms,
                                               version=version)

            # Perform text parsing.
            parsed_news = NewsParser.parse(news=news, text_parser=text_parser)
            debug_show_news_terms(parsed_news=parsed_news)
Example #9
0
def create_frames_collection():
    return RuSentiFramesCollection.read_collection(
        version=RuSentiFramesVersions.V20,
        labels_fmt=ExperimentRuSentiFramesLabelsFormatter())