Esempio n. 1
0
def run():

    gdc = main.load_gold_data(ConfigBase)
    gdc = main.transform_gold_data(ConfigBase, gdc)

    trainer = main.init_trainer(ConfigTdc100, cats_list=gdc.cats_list)
    main.run_training(config=ConfigTdc100, trainer=trainer, gold_data_container=gdc)

    trainer = main.init_trainer(ConfigTdc80, cats_list=gdc.cats_list)
    main.run_training(config=ConfigTdc80, trainer=trainer, gold_data_container=gdc)
Esempio n. 2
0
def run():

    gdc_1 = main.load_gold_data(Config1_1)
    gdc_1 = main.transform_gold_data(Config1_1, gdc_1)
    gdc_1 = main.transform_gold_data(Config1_2, gdc_1)
    gdc = GoldDataContainer(cats_list=gdc_1.cats_list)
    gdc = gold_data_manager.merge_assuming_identical_categories(gdc, gdc_1)

    gdc_2 = main.load_gold_data(Config2)
    gdc_2 = main.transform_gold_data(Config2, gdc_2)
    gdc = gold_data_manager.merge_assuming_identical_categories(gdc, gdc_2)

    gdc_3 = main.load_gold_data(Config3)
    gdc_3 = main.transform_gold_data(Config3, gdc_3)
    gdc = gold_data_manager.merge_assuming_identical_categories(gdc, gdc_3)

    gdc_4 = main.load_gold_data(Config4)
    gdc_4 = main.transform_gold_data(Config4, gdc_4)
    gdc = gold_data_manager.merge_assuming_identical_categories(gdc, gdc_4)

    gdc_5 = main.load_gold_data(Config5)
    gdc_5 = main.transform_gold_data(Config5, gdc_5)
    gdc = gold_data_manager.merge_assuming_identical_categories(gdc, gdc_5)

    gdc_6 = main.load_gold_data(Config6)
    gdc_6 = main.transform_gold_data(Config6, gdc_6)
    gdc = gold_data_manager.merge_assuming_identical_categories(gdc, gdc_6)

    trainer = main.init_trainer(ConfigTrain, cats_list=gdc.cats_list)
    main.run_training(config=ConfigTrain,
                      trainer=trainer,
                      gold_data_container=gdc)

    embed()
Esempio n. 3
0
def train(trainer1, trainer2):

    gdc = main.load_gold_data(ConfigTrainCompareBase)
    gdc = main.transform_gold_data(ConfigTrainCompareBase, gdc)

    if trainer1 is None:
        ConfigTrainCompareBase.should_load_model = False
        ConfigTrainCompareBase.should_create_model = True
        trainer1 = main.init_trainer(ConfigTrainCompare1,
                                     cats_list=gdc.cats_list)
        trainer2 = main.init_trainer(ConfigTrainCompare2,
                                     cats_list=gdc.cats_list)

    main.run_training(ConfigTrainCompare1, trainer1, gdc)
    main.run_training(ConfigTrainCompare2, trainer2, gdc)

    return trainer1, trainer2
def run():

    from trainer.trainer4 import Trainer4
    ConfigRoot.trainer_class = Trainer4
    ConfigRoot.should_load_model = True

    ConfigRoot.table_name_ref_articles = "main_ref_articles"

    trainer = main.init_trainer(ConfigIndex)
    main.run_model_indexer(ConfigIndex, trainer)
def run():

    gdc = main.load_gold_data(ConfigSub)
    gdc = main.transform_gold_data(ConfigSub, gdc)

    for i in range(30):

        if i == 0:
            ConfigSub.should_load_model = False
            ConfigSub.should_create_model = True
        else:
            ConfigSub.should_load_model = True
            ConfigSub.should_create_model = False

        trainer = main.init_trainer(config=ConfigSub, cats_list=gdc.cats_list)
        main.run_training(ConfigSub, trainer, gdc)
Esempio n. 6
0
def run():

    trainer = main.init_trainer(ConfigSub)
    main.run_evaluation(ConfigSub, trainer)

    embed()
Esempio n. 7
0
def run():

    gdc = main.load_gold_data(ConfigSub)
    gdc = main.transform_gold_data(ConfigSub, gdc)
    trainer = main.init_trainer(config=ConfigSub, cats_list=gdc.cats_list)
    main.run_training(ConfigSub, trainer, gdc)
Esempio n. 8
0
def run():

    eval_data_container = main.load_gold_data(ConfigLoadG8)
    eval_data_container = main.transform_gold_data(ConfigLoadG8, eval_data_container)

    modelVR = main.init_trainer(ConfigLoadVRModel)

    main.log_manager.info_global(
        "--------------------------------\n"
        "Evaluating mo11 over the entire dataset g8: \n"
    )
    scores_spacy, scores_manual = modelVR.evaluate(eval_data_container)

    # only look at those examples that mo9 predicts as either AF=SM or AF=SC
    modelAF = main.init_trainer(ConfigLoadAFModel)

    gdis_to_keep = []

    for gdi in eval_data_container.gold_data_item_list: 
    
        doc = modelAF.nlp(gdi.text)

        for cat in ['AF: Social Companions', 'AF: Soziale Medien']: 
            if doc.cats[cat] > 0.5: 
                gdis_to_keep.append(gdi)
                break 

    eval_data_container2 = GoldDataContainer()
    eval_data_container2.cats_list = eval_data_container.cats_list
    eval_data_container2.gold_data_item_list = gdis_to_keep

    main.log_manager.info_global(
        "--------------------------------\n"
        "Evaluating mo11 over those texts in g8 that mo9 predicts to be AF=SM or AF=SC: \n"
    )
    scores_spacy2, scores_manual2 = modelVR.evaluate(eval_data_container2)

    # only look at those examples that were annotated as AF=SM or AF=SC
    
    # we need to reload the data to undo the transformation that removes AF
    eval_data_container = main.load_gold_data(ConfigLoadG8)

    gdis_to_keep = [] 

    for gdi in eval_data_container.gold_data_item_list: 

        for cat in ['AF: Social Companions', 'AF: Soziale Medien']:
            if gdi.cats[cat] == 1:
                gdis_to_keep.append(gdi)
                break 

    eval_data_container3 = GoldDataContainer()
    eval_data_container3.cats_list = eval_data_container.cats_list
    eval_data_container3.gold_data_item_list = gdis_to_keep

    # now apply the transformation that removes all categories except VR
    eval_data_container3 = main.transform_gold_data(ConfigLoadG8, eval_data_container3) 

    main.log_manager.info_global(
        "--------------------------------\n"
        "Evaluating mo11 over those texts in g8 that were annotated as AF=SM or AF=SC: \n"
    )
    scores_spacy3, scores_manual3 = modelVR.evaluate(eval_data_container3)

    embed()
Esempio n. 9
0
def run():

    trainer = main.init_trainer(ConfigIndex)
    main.run_model_indexer(ConfigIndex, trainer)
def run():

    modelRT = main.init_trainer(ConfigLoadRTModel)

    ske_config = {
        "ske_rest_url": credentials.ske_rest_url,
        "ske_corpus_id": credentials.ske_corpus_id,
        "ske_user": credentials.ske_user,
        "ske_password": credentials.ske_password
    }

    ske_manager.create_session(ske_config)

    groups = [{
        "output_detailed_filename":
        "Risikotyp_detailed_SC.csv",
        "output_coarse_filename":
        "Risikotyp_coarse_SC.csv",
        "docids": [
            "PRESSE_201701291915120115",
            "STANDARD_200007081907270238",
            "PRESSE_2018111635E6C549D7",
            "TTKOMP_20160601033409214140080",
            "VN_20160813234127320770111",
            "WZ_20181003_5098459",
            "WZ_20181003031108870400088",
            "DIEWIR_20170904A65AE63CC9",
            "WZ_20160722031108098120017",
            "PROFIL_201604041421430008",
            "PRESSE_201603251915080063",
            "PRESSE_201603250400150004",
            "STANDARD_20130522194002017620108",
            "EMEDIA_201711244E597405B3",
            "KURIER_201805011920500018",
            "PRESSE_201802101915160118",
            "SN_20150912110031136500056",
            "SN_20160729013124288040079",
            "KURIER_201806081920310032",
            "STANDARD_20150909194002257560093",
            "WZ_20170610031246099190029",
            "FURCHE_2017112314B891ACDA",
            "PRESSE_201306301915390060",
            "OOEN_20161018040003118910025",
            "PRESSE_201310131915430079",
            "STANDARD_20170217194004061000060",
            "SN_20160201013102166390056",
            "NVT_20170827543CFC3659",
            "KLEINE_20170827041003215460321",
            "PRESSE_200906071915540116",
            "WZ_20121227031131015130021",
            "KLEINE_20170825041003194510320",
            "STANDARD_20170125194005197690112",
            "FURCHE_20180509F6639BE236",
            "MEDIANET_2016030413C1A5E1EC",
            "NVT_20170825FD0C32D6C8",
            "STANDARD_20160105194002051940104",
            "SN_20150710013128181990083",
            "PRESSE_201611271915120046",
            "VN_20121215232708007760147",
            "STANDARD_20171115194502812820041",
            "STANDARD_201008111925100054",
            "STANDARD_20150828194002169290150",
            "WZ_20180609031309193220091",
            "PRESSE_201704291915140121",
            "STANDARD_20171108194501866020115",
            "PROFIL_20170410B94762E951",
            "KRONE_20171014219271554",
            "WZ_20180217031310334780077",
            "PRESSE_201801141915130063",
            "EMEDIA_20180126F0E083B9A7",
            "WZ_20151114031310271970073",
            "KRONE_20170507107200378",
            "SN_20130619232208096190108",
            "EMEDIA_201302221753240056",
            "TREND_201503230956040007",
            "KURIER_201609080401410126",
            "FALTER_201703224B7E7AC4F1",
            "STANDARD_20180321194502095040050",
            "WIENERIN_2018072680706B2C87",
            "MWVOLL_20170914_8820361",
            "WZ_20160416031109313060093",
            "STANDARD_20161013194004252090046",
            "WZ_20180525C9E368BFD8",
            "WZ_200107201913270001",
            "KURIER_201609091920420072",
            "SN_20170914013122005340117",
            "FURCHE_201205311643170039",
            "KURIER_201707061920410065",
            "PRESSE_201809291915120178",
            "KURIER_201612211920400070",
            "STANDARD_20160326194003226570109",
            "KURIER_201511051920580101",
            "FORMATDB_20161111BC7EA0D6BB",
            "OOEN_20120522040006076900046",
            "WZ_20140124031310182590078",
            "KURIER_201708261920360109",
            "TT_201605016250DBF067",
            "WIBLATT_201608225EEF91EB48",
            "KURIER_200307111621030038",
            "KRONE_20170929508340752",
            "WZ_20150121031316298470064",
            "WZ_20171104031109152100017",
            "KLEINE_201710284F3466E472",
            "SN_20170629013050252890117",
            "KLEINE_20171125041005034520361",
            "MEDIANET_201404259B6F8F2B7E",
            "EMEDIA_20181130F6382BD619",
            "OESTERREICH_20180926975ED551E3",
            "MWVOLL_20030917190000603100",
            "KURIER_201808281920480044",
            "STANDARD_200107071907170162",
            "PRESSE_200212280218170053",
            "KURIER_1996012218530147",
            "PRESSE_201603131915150054",
            "KURIER_201801280401260080",
            "PRESSE_201704121915090007",
            "FORMATDB_201611110F8589863A",
            "PRESSE_201306111915210063",
            "KURIER_201712080401330081",
            "OOEN_20160305040002039760040",
            "STANDARD_20181027194502239080114",
            "DIEWIR_201109051311500026",
            "WZ_20180214031308504950066",
            "DATUM_201202011605500020",
            "OOEN_20160305B035521226",
            "STANDARD_20161109194007308140095",
            "STANDARD_20160504194003235260116",
            "HOR_20181116CB6F841543",
            "KRONE_20181023142940120",
            "KURIER_201209151717240207",
            "FORMATDB_20150925A7BD99FFF2",
            "PRESSE_200704061849390083",
            "KLEINE_20150501041003227740229",
            "TTKOMP_20160719020245298030064",
            "KURIER_201405311831490009",
            "KURIER_201801030401370268",
            "SN_20170210013038072830002",
            "KURIER_201803081920380154",
            "KURIER_201709210401550411",
            "SN_201003132325000157",
            "MWVOLL_20180628_9981783",
            "KLEINE_20170614041003015480051",
            "PRESSE_200812191915420113",
            "PROFIL_199712200615043",
            "PRESSE_201210281916120151",
            "EMEDIA_201812144CB9AE62CE",
            "SN_201103192323490015",
            "WZ_20160820031110268600067",
            "KURIER_201701290401280053",
            "WIENERIN_20161124C7FD632CA1",
            "PRESSE_201605041915110076",
            "OOEN_20170117040003077910020",
            "STANDARD_20170722194501271440089",
            "SN_199707192023320035",
            "MWVOLL_20170112_7821144",
            "KURIER_201705121920390061",
            "TT_20180123C9871AE503",
            "PROFIL_201812178E7D0688AB",
            "WZ_20181229_5148737",
            "KRONE_199904091757078",
            "TT_20170825F693694D0D",
            "FURCHE_20160728E0B6BB12ED",
            "KLEINE_20161028041003014480122",
            "NVT_200108010500550020",
            "MWVOLL_20160616_6998555",
            "KLEINE_20171027041003028330126",
            "PRESSE_201506071915360071",
            "KRONE_20181007810780644",
            "TT_201602032192BBDFF8",
            "SN_200005202042540315",
            "PRESSE_201407161915270070",
            "SN_201001072322140083",
            "KURIER_201709210401550151",
            "NVT_201704192901910B90",
            "KURIER_201801030401370141",
            "KURIER_201212131716140053",
            "KURIER_201010231720010174",
            "OOEN_20170216040003180050041",
            "WZ_20151104031308233360064",
            "KLEINE_20181023041005295000439",
            "KURIER_200812051719380050",
            "FORMATDB_200408061527100123",
            "VN_201802240AED5AC3D8",
            "SN_20181020013238294520065",
            "TT_20130518020702184280",
            "SN_20150928013141286540065",
            "KURIER_201212130257460028",
            "NOEN_20180523015522145170020",
            "KURIER_201511041920580028",
            "TT_201704072E97932212",
            "VN_20181009231332702480107",
            "PRESSE_201708261915150104",
            "KURIER_201712080401330011",
            "STANDARD_20160105194002051940118",
            "STANDARD_200503261922440151",
            "KURIER_201811210401140268",
            "PROFIL_201204161209330087",
            "PROFIL_201405121146010070",
            "STANDARD_20180411194503066820116",
            "VN_20160326230734283850135",
            "OOEN_20180414014503176230089",
            "STANDARD_20160105194002051940116",
            "STANDARD_20180517194502325560049",
            "IM_201606291AC7F29F5E",
            "TT_20160912860AAB4E91",
            "PRESSE_201605291915150097",
            "STANDARD_20181027194502239080115",
            "SN_200604082314150216",
            "KURIER_201709210401550037",
            "OOEN_20181107014504186900074",
            "PRESSE_201208051917230086",
            "PRESSE_201410281915260106",
            "KURIER_201706210401190224",
            "KURIER_201302090309270097",
            "KURIER_201302091832000119",
            "KURIER_201606211920420083",
            "EMEDIA_20170224FEC67206A4",
            "KURIER_201805280400460030",
            "TT_20170825BACDD2A41E",
            "STANDARD_200611181922050155",
            "PRESSE_201607301915130110",
            "WZ_20171201031310660980028",
            "STANDARD_20150218194001313380101",
            "STANDARD_19911221050632",
            "WIENERIN_20170126222697D75E",
            "BVZ_20140701143421024730097",
            "WIBLATT_200804170205030184",
            "IM_20010701_1601310082",
            "IM_201009291642160043",
            "TTKOMP_20160701020515166300102",
            "PRESSE_201507121915360104",
            "STANDARD_19920205054800",
            "WZ_200408052235210142",
            "WZ_20180223031310791180041",
            "TT_201801186199D4726A",
            "OOEN_20171030040004016540027",
            "WZ_20181201_5133365",
            "PROFIL_201307221531470023",
            "KRONE_20160603011960099",
            "PRESSE_201711111915180109",
            "KURIER_201001031728370081",
            "WZ_20171108031310271600038",
            "NEWS_200003092057410012",
            "PRESSE_1999020603430016",
            "NEWS_2016040298A7674F75",
            "KURIER_201702171920390031",
            "VN_20110514D1BD2E4A00",
            "KURIER_200804101725380136",
            "VN_20150801232923258770102",
            "PRESSE_201306301915390059",
            "KRONE_20160911104750263",
            "STANDARD_20150418194005147330050",
            "STANDARD_20171115194502812820138",
            "OOEN_200703030408250119",
            "MWVOLL_20181113_10517373",
            "SN_20160504013130298600094",
            "KLEINE_20170406041003109020296",
            "WZ_20170908031307303630059",
            "WZ_20101023011627071210051",
            "VN_20150509231014309580135",
            "FORMATDB_201401101616150072",
            "KRONE_201412200435490360",
            "KRONE_201412191716410139",
            "TTKOMP_20160226020411097190106",
            "KLEINE_20150805041003069370055",
            "KRONE_20160403211040257",
            "KURIER_201212131716140054",
            "OOEN_200703030408250066",
            "PROFIL_201404071031410011",
            "WIBLATT_200305281924120064",
            "WZ_200810170115320154",
            "VN_20160718233308157760116",
            "STANDARD_19960119054848000020",
            "WZ_20160331031308108280086",
            "KURIER_201601121920550012",
            "OOEN_200809250400290135",
            "WIENERIN_201712149207825157",
            "NOEN_20180117074232329640218",
            "NOEN_20180116080553666750081",
            "PRESSE_20160803B1018081CD",
            "STANDARD_200408021921180127",
            "PROFIL_201807092787C83B57",
            "SN_20170905013041082640031",
            "SN_20140402005000266560093",
            "FORMATDB_201509252E0F8AB8B4",
            "KURIER_201707061920410025",
            "VN_20130518230427033460115",
            "STANDARD_20161221194004221610086",
            "ECHO_200709011522210012",
        ],
    }, {
        "output_detailed_filename":
        "Risikotyp_detailed_SM.csv",
        "output_coarse_filename":
        "Risikotyp_coarse_SM.csv",
        "docids": [
            "HOR_2018082490C6B90BA6",
            "KURIER_201802201920530123",
            "TT_201804180CD23811B4",
            "KURIER_201611201920430069",
            "TREND_201612226E11013D1F",
            "WZ_20161029031111143530003",
            "STANDARD_20170523194005160300053",
            "WZ_20151218031309229620086",
            "TT_201708119645A5DB57",
            "PRESSE_201610011915210114",
            "OOEN_20180707014504095450024",
            "NEWS_201803302D93C6840A",
            "KURIER_201611161920420006",
            "HOR_201802026B29B8014D",
            "PROFIL_20160418C3E30FAB52",
            "STANDARD_20180221194502454650053",
            "VN_20161119234212178480026",
            "NEWS_2016121770EAEEF26F",
            "MEDIANET_201801191CD52EA209",
            "EMEDIA_20170630F066D347A6",
            "WZ_20161124031311072500017",
            "WZ_20160701031110004350080",
            "KURIER_201706101920400064",
            "PRESSE_201206011916560077",
            "WZ_20171230031309606310024",
            "FALTER_2017032239A20C3CF6",
            "STANDARD_20161021194006012290100",
            "WZ_20150617031308046800089",
            "STANDARD_20171009194501248060058",
            "STANDARD_20140930194001232200080",
            "WZ_20161029031111143530065",
            "KURIER_201709161920370055",
            "HOR_20180420D5FA94E99C",
            "WZ_20180116031310640080065",
            "PROFIL_201504271515360021",
            "FALTER_201804043CD17074DB",
            "WZ_20140214031311088700075",
            "WZ_20160713031109310430069",
            "STANDARD_20161126194005245720106",
            "WZ_20161228031110098750062",
            "KONSUM_2018122075F5776A7B",
            "STANDARD_20121217194003077710071",
            "FALTER_20161027BE722103D4",
            "NVT_20161227316B6D24F7",
            "STANDARD_20180612194505516790071",
            "FURCHE_201709219910618CEC",
            "NEWS_20161029A3577B7D95",
            "TT_201704244B424FE8D5",
            "STANDARD_20180523040502553940131",
            "TVMEDIA_20161130A193B0E0D0",
            "KLEINE_20161227041002255230055",
            "KRONE_20180322204680105",
            "HOR_20180518B60B469237",
            "STANDARD_20170323194003312040055",
            "STANDARD_20160507194005312160081",
            "WZ_20160102031110045780077",
            "WZ_20111110031312018380097",
            "PRESSE_201801291915100054",
            "HOR_20170929ED5BF5B1F9",
            "WZ_20170816031306156110056",
            "STANDARD_20130827194001212180078",
            "MWVOLL_20161203_7668395",
            "EMEDIA_20170825E2EFCA16F6",
            "MWVOLL_20131007_3368714",
            "SN_20180505013249159810031",
            "WZ_20141219031315186660080",
            "KURIER_201805031920330140",
            "PRESSE_201805281915090038",
            "FORMATDB_20161014B62E777BAC",
            "TT_201803290E1143FE33",
            "WZ_20130731031312148090058",
            "HOR_201709298002CF28C0",
            "KURIER_201609231920410017",
            "KURIER_201610132035450044",
            "FALTER_201607068366BB6A49",
            "WZ_20160520031309097640078",
            "FALTER_20160706C6654FC375",
            "MWVOLL_20180621_9948695",
            "KRONE_20180113665350016",
            "PRESSE_201801131915170068",
            "WZ_20140927031310082540015",
            "TT_20161231E97D18764E",
            "MEDIANET_20160902B0C588AC2A",
            "AUGUSTIN_20180523BD35D9AADB",
            "KURIER_201611161920420050",
            "HOR_201806083E1BC69BF7",
            "FALTER_201210171627490040",
            "WZ_20160430031309229040097",
            "KURIER_201611211920390062",
            "KURIER_201612291920400008",
            "KURIER_201611160401360151",
            "KURIER_201705131920390057",
            "TT_2018050580FC7DEEE6",
            "KURIER_201708011920340147",
            "OOEN_20161124040003143720095",
            "KURIER_201708021920320126",
            "FALTER_20170301591854D1A7",
            "FORMATDB_201404251509320066",
            "TT_200605110130510059",
            "KONSUM_201811226DB535AF40",
            "WZ_20120811031310280150021",
            "OOEN_20180125040004262250071",
            "STANDARD_20180407194503450510186",
            "HOR_201604291219400028",
            "SBGW_20180329030104250810004",
            "KURIER_201612211920400031",
            "PRESSE_201209131916470087",
            "TREND_201410271119240033",
            "KURIER_201701051920400069",
            "PRESSE_201812191915090090",
            "KURIER_201701250401240048",
            "STANDARD_20160810194003101160073",
            "FALTER_201607066AE4DE74BC",
            "STANDARD_20180420194502107340150",
            "OOEN_20180623014504177420057",
            "SN_20180113013228261630040",
            "MWVOLL_20151010_5965051",
            "WZ_20180621031308464340089",
            "HOR_201805116BB5AD4C0E",
            "WZ_20160406031108092660081",
            "EMEDIA_2018121434088C1B16",
            "PRESSE_201708250400130056",
            "WZ_20170729031307212620093",
            "KURIER_201704041920380060",
            "EMEDIA_20181025A5F12F21B0",
            "SN_20180726013047117830037",
            "PRESSE_201811091915090081",
            "MEDIANET_20160520329B003D38",
            "WZ_20171230031309606310089",
            "SN_20150625013134095230069",
            "FALTER_20161221BEEFEA679E",
            "STANDARD_20160416194005259970135",
            "WZ_20140327031316211250086",
            "KURIER_201605090401000029",
            "PRESSE_201811031915130184",
            "TREND_201309301540590016",
            "SN_20170619013047264520053",
            "WZ_20180303031309561710033",
            "KURIER_201612171920420009",
            "OOEN_20180502014504039450045",
            "OOEN_20171130040004177310091",
            "MEDIANET_20150626D24AEE7D06",
            "WZ_20180421226A5B8EF8",
            "STANDARD_20140205194001180700014",
            "KURIER_201210090311310024",
            "OOEN_20150311035959197400062",
            "EMEDIA_2017063098D7BFB26C",
            "PROFIL_201703131303200008",
            "KURIER_201709260401220003",
            "WZ_20160617031109050310088",
            "WZ_20170314031110164530078",
            "KURIER_201807101920500128",
            "PRESSE_201706191915100015",
            "STANDARD_20181020194503555290113",
            "SN_20121114232153184730065",
            "STANDARD_20180213194502596740055",
            "MEDIANET_201401144B991FAE58",
            "WZ_20160827031109196290092",
            "KLEINE_20170619041003151010191",
            "WZ_20171018031307015270083",
            "KLEINE_20180428041005096000409",
            "WZ_20161116031310289520007",
            "WZ_20180608031308098960016",
            "WZ_20121113031323233630015",
            "KURIER_201706231920420148",
            "STANDARD_20180217194503792870144",
            "HOR_2017092225EA50DB01",
            "STANDARD_20180306194502455760039",
            "WIENERIN_2017112311A04DB8BA",
            "KURIER_201706141920400055",
            "PROFIL_20160613509111033C",
            "VN_20170619231851153070030",
            "KURIER_201806261921130145",
            "KURIER_201712061920340136",
            "WZ_20140816031315025180060",
            "PRESSE_201601091915170066",
            "STANDARD_20180228194502100220091",
            "TREND_201707288F5D6CABCD",
            "SN_20180609013300254480032",
            "WZ_20160930031310160560038",
            "TT_20180429EA1BDEB143",
            "MWVOLL_20181112_10516521",
            "ARBEITW_2015091667B9268003",
            "PRESSE_201703121915120154",
            "STANDARD_20180505194503285970092",
            "WZ_20131029031314195500091",
            "WZ_20170223031110261430020",
            "HOR_201505221355140028",
            "MWVOLL_20180527_9850944",
            "MWVOLL_20161216_7720033",
            "WZ_20111115031309007650064",
            "OOEN_20171019040003108430459",
            "STANDARD_20181121194502895250091",
            "PROFIL_201507279F8A2214BC",
            "WZ_20120204031314062310127",
            "KURIER_201706220401530297",
            "KURIER_201606220402160236",
            "SN_20160921013100182820086",
            "HOR_20171027BD8378B6BF",
            "STANDARD_20171122194502350460038",
            "FALTER_2015120262C41042D2",
            "KLEINE_20180603041006090110503",
            "KLEINE_20171004041005036270360",
            "WZ_20150224031309054110092",
            "PRESSE_201509191915460085",
            "HOR_201804063E31662A39",
            "VN_20170703232342174230113",
            "FORMATDB_201705123B2C075201",
            "KRONE_20180112749000008",
            "WZ_20150617031308046800070",
            "HOR_2018051818434D63C5",
            "SN_20180516013053016430080",
            "WZ_20150522031314256230013",
            "STANDARD_20130614194003266710096",
            "MWVOLL_20151221_6271834",
            "VN_20180324000014095820021",
            "OOEN_20171129040004132910109",
            "STANDARD_20160921194004165110114",
            "STANDARD_201112151925160084",
            "HOR_201505291614310008",
            "SN_20180705013104300970047",
            "PROFIL_20180806F18A6B1C82",
            "WIBLATT_201205218B11B07398",
            "NVT_20130825348874E2F3",
            "HOR_20180112C7D5EFDB5D",
            "STANDARD_20141112040502235530058",
            "TT_201604293A9C709A55",
            "STANDARD_201109291925100060",
            "HOR_201210121525480015",
            "PRESSE_201405041915370159",
            "MWVOLL_20130610_2958740",
            "PRESSE_201608061915130103",
            "STANDARD_20130713194001270370094",
            "HOR_20161125EDE694F8A7",
            "MEDIANET_2018051854EE2019AB",
            "NVT_201801314C009E2719",
            "WZ_20180901_5080836",
            "EMEDIA_20141128E23D140AB9",
            "KLEINE_20180131041006324230241",
            "KURIER_201807191920330152",
            "PRESSE_201806090050160116",
            "MEDIANET_20180427EE75BF82BD",
            "NVT_201712060F00F9454E",
            "KLEINE_20171206041004267390382",
            "KURIER_201806021920310102",
            "OESTERREICH_201705281039445318",
            "WZ_20111223031310295860064",
            "TT_20181208BE359ADF61",
            "STANDARD_200708291921100100",
            "STANDARD_20171102194502538810063",
            "KURIER_201803130401050042",
            "KURIER_201604131921040170",
            "KURIER_201803131920520035",
            "KLEINE_20151231041001048910051",
            "WZ_20141127031310104160016",
            "KRONE_20161030176580104",
            "KURIER_201306301834350076",
            "KURIER_201701250401240118",
            "KURIER_201611011921000073",
            "STANDARD_20160924194006198620098",
            "SN_20140908003124006640046",
            "OEREICHE_20150917CDE3FEEF88",
            "SN_20140811003124166830012",
            "KURIER_201709150402190191",
            "STANDARD_20171012040504673550028",
            "HOR_201709222740CBF99B",
            "HOR_20160415965951E8CD",
            "HOR_201111111511230025",
            "FALTER_201008111611360076",
            "FORMATDB_2018071356A3AACC31",
        ],
    }, {
        "output_detailed_filename":
        "Risikotyp_detailed_SM_SC.csv",
        "output_coarse_filename":
        "Risikotyp_coarse_SM_SC.csv",
        "docids": [
            "NVT_20171014318738E118",
            "KLEINE_20170810041002041130292",
            "SN_20160608013118289810045",
            "STANDARD_20160419194003243230079",
            "KURIER_201701121920380076",
            "TTKOMP_20161221020458156710232",
            "KRONE_20161030176580107",
            "OOEN_20171216040003158190062",
            "TT_201805120C8741EEB7",
        ],
    }]

    labels = list(modelRT.nlp.get_pipe("textcat").labels)
    main.log_manager.info_global("--------------------------------\n"
                                 f"Labels: {labels}\n")

    header = ["docid"] + labels

    for group in groups:

        main.log_manager.info_global("--------------------------------\n"
                                     f"Starting new group\n")

        progressbar = progress.bar.Bar(
            'Calculating predictions for each text ...',
            max=len(group['docids']),
            suffix='%(index)d/%(max)d done, ETA: %(eta_td)s h')

        group['predictions'] = []
        group['labels'] = []

        for docid in group['docids']:

            text = ske_manager.get_doc_from_docid(ske_config, docid)['text']

            doc = modelRT.nlp(text)

            prediction = {
                "docid": docid,
            }

            for cat in doc.cats:

                prediction[cat] = doc.cats[cat]

            group['predictions'].append(prediction)

            progressbar.next()

        progressbar.finish()

        main.log_manager.info_global("--------------------------------\n"
                                     f"Output CSVs\n")

        with open(group["output_detailed_filename"], 'w') as f:

            writer = csv.DictWriter(f, fieldnames=header)
            writer.writeheader()
            writer.writerows(group['predictions'])

        with open(group['output_coarse_filename'], 'w') as f:

            writer = csv.writer(f, dialect="excel")
            writer.writerows(
                [[pred['docid']] +
                 [cat for cat in pred if cat != "docid" and pred[cat] > 0.5]
                 for pred in group['predictions']])

    ske_manager.close_session()