Example #1
0
def manual_import(args):
    if args.dry_run:
        MDB.set_storage_mode("runtime")

    files = []
    for file in args.files:
        if file.is_file():
            files.append(file)
        elif file.is_dir():
            for sub_file in list(file.iterdir()):
                if sub_file.is_file():
                    files.append(sub_file)

    for file in files:
        logger.info("reading \"{}\" now...".format(file.as_posix()))
        transcripts = list()

        if file.suffix.lower() == ".json":
            logger.info("reading json based transcript file now...")
            file_content = read_transcripts_json_file(file)
        else:
            logger.info("reading xml based transcript file now...")
            file_content = [read_transcript_xml_file(file)]

        logger.info("extracting communication model now...".format(file.as_posix()))
        for metadata, inter_candidates in file_content:

            transcript = Transcript.from_interactions(
                metadata=metadata,
                interactions=extract_communication_model(
                    candidates=inter_candidates,
                    add_debug_objects=args.add_debug_objects))

            # insert into DB
            if not args.dry_run:
                transcript_dict = transcript.dict(exclude_none=True, exclude_unset=True)
                logger.info(f"writing transcript with '{len(transcript_dict['interactions'])}' interactions into db.")
                database.update_one("session", {"session_id": transcript.session_no}, transcript_dict)

            transcripts.append(transcript)

            # notify sentiment group
            if args.notify and transcript:
                utils.notify_sentiment_analysis_group([str(transcript.session_no)])

        cm = CommunicationModel(transcripts=transcripts)

        if args.dry_run:
            out_file: Path = file.with_suffix(".converted.json")
            logger.info("writing transcripts into {}.".format(out_file.absolute().as_posix()))
            with open(out_file, "w", encoding="utf-8") as o:
                o.write(cm.json(exclude_none=True, indent=4, ensure_ascii=False))
            with open(out_file.parent / "mdb.json", "w", encoding="utf-8") as o:
                safe_json_dump(MDB._mdb_runtime_storage, o)
Example #2
0
import unittest
from datetime import datetime

from cme.domain import InteractionCandidate, MDB, Faction
from cme.extraction import extract_communication_model

MDB.set_storage_mode("runtime")


def _build_candidate(comment: str) -> InteractionCandidate:
    return InteractionCandidate(speaker=MDB.find_and_add_in_storage(
        forename="Likey",
        surname="McUnittest",
        memberships=[(datetime.min, None, Faction.NONE)]),
                                paragraph="Unittest",
                                comment=comment)


class TestExtraction(unittest.TestCase):
    def test_extract_sample1(self):
        comment = "(Beifall bei der FDP sowie bei Abgeordneten der CDU/CSU, der SPD und des BÜNDNISSES 90/DIE GRÜNEN – Dr. Eberhardt Alexander Gauland [AfD]: Ha, ha, ha!)"

        cm = extract_communication_model([_build_candidate(comment)])
        interaction_0 = cm[0]
        interaction_1 = cm[1]
        interaction_2 = cm[2]
        interaction_3 = cm[3]
        interaction_4 = cm[4]
        self.assertEqual(interaction_0.sender, Faction.FDP)
        self.assertEqual(
            interaction_0.message,