def fit(self, df_path, model_name="lm_5_ep_lr2-3_5_stlr", dict_name="itos", num_epochs=5, is_fit_topics=False): df = pd.read_csv(df_path, sep="\t") bs = 16 texts = pd.DataFrame(list(df.text)) self.data = TextList.from_df(texts, processor=[TokenizeProcessor(tokenizer=Tokenizer(lang="xx")), NumericalizeProcessor(vocab=Vocab.load("models/{}.pkl".format(dict_name)))]).\ random_split_by_pct(.1).\ label_for_lm().\ databunch(bs=bs) self.learn = language_model_learner( self.data, AWD_LSTM, pretrained=False, drop_mult=0.7, pretrained_fnames=[model_name, dict_name]) self.learn.unfreeze() self.learn.lr_find(start_lr=slice(10e-7, 10e-5), end_lr=slice(0.4, 10)) _ = self.learn.recorder.plot(skip_end=10, suggestion=True) best_lm_lr = self.learn.recorder.min_grad_lr #print(best_lm_lr) self.learn.fit_one_cycle( num_epochs, best_lm_lr, callbacks=[ReduceLROnPlateauCallback(self.learn, factor=0.8)]) # TODO: fit lda if is_fit_topics: pass return self
def fit_model(learn, epoch, learner_saved, encoder_saved): if learner_saved and encoder_saved: learn.load(learner_saved) learn.load_encoder(encoder_saved) learn.fit_one_cycle(epoch, 2e-3, moms=(0.8, 0.7), callbacks=[ SaveModelCallback(learn), ReduceLROnPlateauCallback(learn, factor=0.8) ]) leaner_to_save = "lm_" + str(epoch) + "_ep_lr2-3Px" encoder_to_save = "lm_" + str(epoch) + "_ep_lr2-3_encx" learn.save(leaner_to_save) learn.save_encoder(encoder_to_save) return learn, leaner_to_save, encoder_to_save
def __init__(self, data_path: str = 'lang_model', emb_sz: int = 800, qrnn: bool = False, bidir: bool = False, n_layers: int = 4, n_hid: int = 2500, bs: int = 104, bptt: int = 67, lr: float = 0.0013, wd: float = .012, one_cycle: bool = True, cycle_len: int = 1) -> None: """ Instantiate AWD_LSTM Language Model with hyper-parameters. data_path: str path where databunch is loaded from emb_sz: int size of word embeddings qrnn: bool whether or not to use qrnn (requires CudNN) bidir: bool if RNN should be bi-directional n_layers: int number of layers in lang model n_hid: int number of hidden units in model lr: float learning rate bptt: int back-propigation-through-time; max sequence length through which gradients will be accumulated. bs: int batch size The hyper-parameters are stored in a fastai dict called `fastai.text.models.awd_lstm_lm_config`: {'emb_sz': 400, 'n_hid': 1150, 'n_layers': 3, 'pad_token': 1, 'qrnn': False, 'bidir': False, 'output_p': 0.1, 'hidden_p': 0.15, 'input_p': 0.25, 'embed_p': 0.02,'weight_p': 0.2, 'tie_weights': True, 'out_bias': True} """ self.lr, self.wd, self.one_cycle, self.cycle_len = lr, wd, one_cycle, cycle_len awd_lstm_lm_config.update( dict(emb_sz=emb_sz, qrnn=qrnn, bidir=bidir, n_layers=n_layers, n_hid=n_hid)) #log params wb_handle = wandb.init(config=awd_lstm_lm_config) wandb.config.update({ 'data_path': str(data_path), 'bs': bs, 'bptt': bptt, 'lr': lr }) self.csv_name = 'history_' + wb_handle.name wandb.config.update({'csvlog_save_path': self.csv_name}) # instantiate databunch self.data_lm = load_data(data_path, bs=bs, bptt=bptt) # instantiate language model self.learn = language_model_learner(data=self.data_lm, arch=AWD_LSTM, pretrained=False, model_dir=Path('models_' + wb_handle.name), config=awd_lstm_lm_config) self.full_model_path = str(self.learn.path / self.learn.model_dir) wandb.config.update({'model_save_path': self.full_model_path}) # prepare callbacks escb = EarlyStoppingCallback(learn=self.learn, patience=2) smcb = SaveModelCallback(learn=self.learn, name='best_' + wb_handle.name) rpcb = ReduceLROnPlateauCallback(learn=self.learn, patience=1) csvcb = CSVLogger(learn=self.learn, filename=self.csv_name) wb = wandbCallback(self.learn) self.callbacks = [escb, smcb, rpcb, csvcb, wb] self.fit()
1e-4, # max_lr=1e-4, # div_factor=1e2, # final_div=1e3, # pct_start=0.1, callbacks=[ SaveModelCallback(learn, name=f"model_best_{fold}", monitor="kappa_score"), TerminateOnNaNCallback(), LoggerCallback( len(data.dl(DatasetType.Train)) + len(data.dl(DatasetType.Valid)), learn), logger, ReduceLROnPlateauCallback(learn, patience=2, factor=0.5, min_lr=1e-7), ], ) torch.save(learn.model.state_dict(), f"{fname}_{fold}.pth") learn.model.eval() with torch.no_grad(): for step, (x, y) in progress_bar(enumerate(data.dl(DatasetType.Valid)), total=len(data.dl(DatasetType.Valid))): p = learn.model(*x) pred.append(p.float().cpu()) target.append(y.cpu())