def train_lm(n_cycles: int = 3, cycle_len: int = 1, cycle_mult: int = 2, momentum: float = 0.8, bptt: int = 40, lr: float = 1e-3, wd: float = 1e-7): datasets = create_or_restore(DATA_ROOT) lm_data = [ fastai_patch(ds) for ds in (datasets['train_unsup'], datasets['test_unsup']) ] bunch = TextLMDataBunch.create(lm_data, path=LM_PATH, bptt=bptt) n = sum(len(ds) for ds in lm_data) phases = [ TrainingPhase(n * (cycle_len * cycle_mult**i), lrs=lr, moms=momentum, lr_anneal=annealing_cos) for i in range(n_cycles) ] learner = RNNLearner.language_model(bunch, bptt) cbs = [ EarlyStopping(learner, patience=2), GeneralScheduler(learner, phases), SaveModel(learner) ] if cycle_mult == 1: total_epochs = n_cycles * cycle_len else: total_epochs = int(cycle_len * (1 - cycle_mult**n_cycles) / (1 - cycle_mult)) print(f'Total number of epochs: {total_epochs:d}') try: learner.fit(total_epochs, wd=wd, callbacks=cbs) except RuntimeError as e: print(f'Model training error: {e}') finally: folder = learner.path / learner.model_dir print(f'Saving latest model state into {folder}') learner.save('lm_final') learner.save_encoder('lm_final_enc')
'PersonalStories', 'PossiblyFeedback', 'SentimentNegative', 'SentimentNeutral', 'SentimentPositive' ] fold = str(args.fold) cat = all_cats[args.cl] model_id = '2019_ 4_04_20_02_39_766228' model_id = '2019_ 4_05_02_05_04_069084' model_id = '2019_ 4_05_11_54_23_327097' model_id = '2019_ 4_06_18_53_42_028549' model_id = '2019_ 4_07_20_36_23_662822' model_id = '2019_ 4_07_20_35_19_533996' model_id = '2019_ 4_07_23_55_08_571313' exp_path = '/mnt/data/group07/johannes/ompc/ppexp_short4/' + cat + '_' + fold data_lm_ft = TextLMDataBunch.load( Path('/mnt/data/group07/johannes/ompc/pplmexp_short4')) if True or not Path('/mnt/data/group07/johannes/ompc/ppexp_short4/' + cat + '_' + fold + '/models/enc5.pth').is_file(): if True or not Path( '/mnt/data/group07/johannes/ompc/pplmexp_short/models/enc5.pth' ).is_file(): print('need to save enc') learn_lm = language_model_learner(data_lm_ft) learn_lm.load(model_id) learn_lm.save_encoder('enc5') del learn_lm print('need to copy enc') os.makedirs( os.path.dirname('/mnt/data/group07/johannes/ompc/ppexp_short4/' + cat + '_' + fold + '/models/enc5.pth'),
import news_utils.fastai import news_utils.clean.german # In[ ]: parser = argparse.ArgumentParser() parser.add_argument("--cl", type=int) parser.add_argument("--fold", type=int) args = parser.parse_args() bpemb_de = BPEmb(lang="de", vs=25000, dim=300) data_lm_ft = TextLMDataBunch.load(path='/mnt/data/group07/johannes/ompc/lmexp', cache_name='whatever') all_cats = ['ArgumentsUsed', 'Discriminating', 'Inappropriate', 'OffTopic', 'PersonalStories', 'PossiblyFeedback', 'SentimentNegative', 'SentimentNeutral', 'SentimentPositive'] fold = str(args.fold) cat = all_cats[args.cl] model_id = '2019_ 4_01_00_11_32_066215' exp_path = '/mnt/data/group07/johannes/ompc/exp/' + cat + '_' + fold os.makedirs(os.path.dirname('/mnt/data/group07/johannes/ompc/exp/' + cat + '_' + fold +'/models/enc5.pth'), exist_ok=True) shutil.copy('/mnt/data/group07/johannes/ompc/lmexp/models/enc5.pth', '/mnt/data/group07/johannes/ompc/exp/' + cat + '_' + fold +'/models/enc5.pth') def run_for_class(it=1): train_df = pd.read_pickle(Path('/mnt/data/group07/johannes/ompc/data_ann')/cat/fold/'train.pkl') test_df = pd.read_pickle(Path('/mnt/data/group07/johannes/ompc/data_ann')/cat/fold/'test.pkl')