Exemple #1
0
def create_and_fill_variant_collection(frames_collection):
    frame_variant_collection = FrameVariantsCollection()
    frame_variant_collection.fill_from_iterable(
        variants_with_id=frames_collection.iter_frame_id_and_variants(),
        overwrite_existed_variant=True,
        raise_error_on_existed_variant=False)
    return frame_variant_collection
    def __iter_frame_variants():
        frames_collection = RuSentiFramesCollection.read_collection(RuSentiFramesVersions.V20)

        frame_variants = FrameVariantsCollection()
        frame_variants.fill_from_iterable(variants_with_id=frames_collection.iter_frame_id_and_variants(),
                                          overwrite_existed_variant=True,
                                          raise_error_on_existed_variant=False)

        for v, _ in frame_variants.iter_variants():
            yield v
Exemple #3
0
    def __create_frames_variants_collection(self):
        frames = RuSentiFramesCollection.read_collection(
            RuSentiFramesVersions.V20)
        frame_variant_collection = FrameVariantsCollection()
        frame_variant_collection.fill_from_iterable(
            variants_with_id=frames.iter_frame_id_and_variants(),
            overwrite_existed_variant=True,
            raise_error_on_existed_variant=False)

        return frame_variant_collection
Exemple #4
0
    def test_reading(self):

        # Initializing logger
        logger = logging.getLogger(__name__)
        logger.setLevel(logging.INFO)
        logging.basicConfig(level=logging.DEBUG)

        frames = RuSentiFramesCollection.read_collection(
            RuSentiFramesVersions.V20)

        for frame_id in frames.iter_frames_ids():
            # id
            logger.info("Frame: {}".format(frame_id))
            # titles
            logger.info("Titles: {}".format(",".join(
                frames.get_frame_titles(frame_id))))
            # variants
            logger.info("Variants: {}".format(",".join(
                frames.get_frame_variants(frame_id))))
            # roles
            for role in frames.get_frame_roles(frame_id):
                logger.info("Role: {}".format(" -- ".join(
                    [role.Source, role.Description])))
            # states
            for state in frames.get_frame_states(frame_id):
                logger.info("State: {}".format(",".join(
                    [state.Role,
                     state.Label.to_class_str(),
                     str(state.Prob)])))
            # polarity
            for polarity in frames.get_frame_polarities(frame_id):
                logger.info("Polarity: {}".format(",".join([
                    polarity.Source, polarity.Destination,
                    polarity.Label.to_class_str()
                ])))

            has_a0_a1_pol = frames.try_get_frame_polarity(frame_id,
                                                          role_src="a0",
                                                          role_dest="a1")
            logger.info(
                "Has a0->a1 polarity: {}".format(has_a0_a1_pol is not None))

        # frame variants.
        frame_variants = FrameVariantsCollection()
        frame_variants.fill_from_iterable(
            variants_with_id=frames.iter_frame_id_and_variants(),
            overwrite_existed_variant=True,
            raise_error_on_existed_variant=False)

        frame_variant = frame_variants.get_variant_by_value("хвалить")

        assert (isinstance(frame_variant, FrameVariant))

        logger.info("FrameVariantValue: {}".format(frame_variant.get_value()))
        logger.info("FrameID: {}".format(frame_variant.FrameID))
Exemple #5
0
    def test_parsing(self):

        # Initializing logger.
        logger = logging.getLogger(__name__)
        logger.setLevel(logging.DEBUG)
        logging.basicConfig(level=logging.DEBUG)

        # Initializing stemmer.
        stemmer = MystemWrapper()

        # frame and variants.
        frames = RuSentiFramesCollection.read_collection(version=RuSentiFramesVersions.V20)
        frame_variants = FrameVariantsCollection()
        frame_variants.fill_from_iterable(variants_with_id=frames.iter_frame_id_and_variants(),
                                          overwrite_existed_variant=True,
                                          raise_error_on_existed_variant=False)

        text_parser = BaseTextParser(pipeline=[RuSentRelTextEntitiesParser(),
                                               DefaultTextTokenizer(keep_tokens=True),
                                               LemmasBasedFrameVariantsParser(frame_variants=frame_variants,
                                                                              stemmer=stemmer,
                                                                              save_lemmas=False),
                                               FrameVariantsSentimentNegation()])

        # Reading synonyms collection.
        synonyms = RuSentRelSynonymsCollectionProvider.load_collection(stemmer=stemmer)

        version = RuSentRelVersions.V11
        for doc_id in RuSentRelIOUtils.iter_collection_indices(version):

            # Parsing
            news = RuSentRelNews.read_document(doc_id=doc_id,
                                               synonyms=synonyms,
                                               version=version)

            # Perform text parsing.
            parsed_news = NewsParser.parse(news=news, text_parser=text_parser)
            debug_show_news_terms(parsed_news=parsed_news)
    def setUpClass(cls):
        cls.stemmer = MystemWrapper()
        cls.entities_formatter = RussianEntitiesCasedFormatter(
            pos_tagger=POSMystemWrapper(Mystem(entire_input=False)))
        cls.synonyms = RuSentRelSynonymsCollectionProvider.load_collection(
            stemmer=cls.stemmer)
        cls.frames_collection = RuSentiFramesCollection.read_collection(
            version=RuSentiFramesVersions.V10)

        cls.unique_frame_variants = FrameVariantsCollection()
        cls.unique_frame_variants.fill_from_iterable(
            variants_with_id=cls.frames_collection.iter_frame_id_and_variants(
            ),
            overwrite_existed_variant=True,
            raise_error_on_existed_variant=False)
Exemple #7
0
def __about(frames_collection, pos_tagger):
    all_frame_entries = list(frames_collection.iter_frame_id_and_variants())

    unique_frame_variants = FrameVariantsCollection()
    unique_frame_variants.fill_from_iterable(
        variants_with_id=all_frame_entries,
        overwrite_existed_variant=True,
        raise_error_on_existed_variant=False)

    assert (isinstance(frames_collection, RuSentiFramesCollection))
    unique_variants = list(unique_frame_variants.iter_variants())

    phrases = []
    nouns = []
    verbs = []
    other = []
    for frame_id, variant in unique_variants:
        assert (isinstance(variant, FrameVariant))

        terms = list(variant.iter_terms())
        if len(terms) > 1:
            phrases.append(variant.get_value())
            continue
        pos_type = pos_tagger.get_term_pos(terms[0])
        if pos_tagger.is_noun(pos_type):
            nouns.append(terms[0])
            continue
        if pos_tagger.is_verb(pos_type):
            verbs.append(terms[0])
            continue
        other.append(terms[0])

    titles = []
    for frame_id in frames_collection.iter_frames_ids():
        titles.extend(frames_collection.get_frame_titles(frame_id))

    print("Frames count:", len(list(frames_collection.iter_frames_ids())))
    print("---------------")
    print()

    print("Quantitative characteristics of the RuSentiFrames entries:")
    print("Verbs:", len(verbs))
    print("Nouns:", len(nouns))
    print("Phrases:", len(phrases))
    print("Other:", len(other))
    print("Unique entries:", len(unique_variants))
    print("Total entries: ", len(all_frame_entries))
    print()

    print(
        "The distribution of RuSentiFrames text entries according to attitudes:"
    )
    print(
        "A0 to A1 Pos",
        len(
            __get_variants_with_polarities(frames_collection=frames_collection,
                                           role_src='a0',
                                           role_dest='a1',
                                           label=PositiveLabel())))
    print(
        "A0 to A1 Neg",
        len(
            __get_variants_with_polarities(frames_collection=frames_collection,
                                           role_src='a0',
                                           role_dest='a1',
                                           label=NegativeLabel())))
    print(
        "Author to A0 Pos",
        len(
            __get_variants_with_polarities(frames_collection=frames_collection,
                                           role_src='author',
                                           role_dest='a0',
                                           label=PositiveLabel())))
    print(
        "Author to A0 Neg",
        len(
            __get_variants_with_polarities(frames_collection=frames_collection,
                                           role_src='author',
                                           role_dest='a0',
                                           label=NegativeLabel())))
    print(
        "Author to A1 Pos",
        len(
            __get_variants_with_polarities(frames_collection=frames_collection,
                                           role_src='author',
                                           role_dest='a1',
                                           label=PositiveLabel())))
    print(
        "Author to A1 Neg",
        len(
            __get_variants_with_polarities(frames_collection=frames_collection,
                                           role_src='author',
                                           role_dest='a1',
                                           label=NegativeLabel())))
    print()

    print(
        "The distribution of RuSentiFrames text entries according to effects on main participants:"
    )
    print(
        "A0 Pos",
        len(
            __get_frame_effects(frames_collection=frames_collection,
                                role='a0',
                                label=PositiveLabel())))
    print(
        "A0 Neg",
        len(
            __get_frame_effects(frames_collection=frames_collection,
                                role='a0',
                                label=NegativeLabel())))
    print(
        "A1 Pos",
        len(
            __get_frame_effects(frames_collection=frames_collection,
                                role='a1',
                                label=PositiveLabel())))
    print(
        "A1 Neg",
        len(
            __get_frame_effects(frames_collection=frames_collection,
                                role='a1',
                                label=NegativeLabel())))