def manual_import(args): if args.dry_run: MDB.set_storage_mode("runtime") files = [] for file in args.files: if file.is_file(): files.append(file) elif file.is_dir(): for sub_file in list(file.iterdir()): if sub_file.is_file(): files.append(sub_file) for file in files: logger.info("reading \"{}\" now...".format(file.as_posix())) transcripts = list() if file.suffix.lower() == ".json": logger.info("reading json based transcript file now...") file_content = read_transcripts_json_file(file) else: logger.info("reading xml based transcript file now...") file_content = [read_transcript_xml_file(file)] logger.info("extracting communication model now...".format(file.as_posix())) for metadata, inter_candidates in file_content: transcript = Transcript.from_interactions( metadata=metadata, interactions=extract_communication_model( candidates=inter_candidates, add_debug_objects=args.add_debug_objects)) # insert into DB if not args.dry_run: transcript_dict = transcript.dict(exclude_none=True, exclude_unset=True) logger.info(f"writing transcript with '{len(transcript_dict['interactions'])}' interactions into db.") database.update_one("session", {"session_id": transcript.session_no}, transcript_dict) transcripts.append(transcript) # notify sentiment group if args.notify and transcript: utils.notify_sentiment_analysis_group([str(transcript.session_no)]) cm = CommunicationModel(transcripts=transcripts) if args.dry_run: out_file: Path = file.with_suffix(".converted.json") logger.info("writing transcripts into {}.".format(out_file.absolute().as_posix())) with open(out_file, "w", encoding="utf-8") as o: o.write(cm.json(exclude_none=True, indent=4, ensure_ascii=False)) with open(out_file.parent / "mdb.json", "w", encoding="utf-8") as o: safe_json_dump(MDB._mdb_runtime_storage, o)
import unittest from datetime import datetime from cme.domain import InteractionCandidate, MDB, Faction from cme.extraction import extract_communication_model MDB.set_storage_mode("runtime") def _build_candidate(comment: str) -> InteractionCandidate: return InteractionCandidate(speaker=MDB.find_and_add_in_storage( forename="Likey", surname="McUnittest", memberships=[(datetime.min, None, Faction.NONE)]), paragraph="Unittest", comment=comment) class TestExtraction(unittest.TestCase): def test_extract_sample1(self): comment = "(Beifall bei der FDP sowie bei Abgeordneten der CDU/CSU, der SPD und des BÜNDNISSES 90/DIE GRÜNEN – Dr. Eberhardt Alexander Gauland [AfD]: Ha, ha, ha!)" cm = extract_communication_model([_build_candidate(comment)]) interaction_0 = cm[0] interaction_1 = cm[1] interaction_2 = cm[2] interaction_3 = cm[3] interaction_4 = cm[4] self.assertEqual(interaction_0.sender, Faction.FDP) self.assertEqual( interaction_0.message,