def run(): models = { 'resnet34': mod.resnet34, 'resnet50': mod.resnet50, 'resnet101': mod.resnet101, 'resnet152': mod.resnet152 } db = load_data_classif(cfg.LABELS, bs=8 * cfg.BATCH_SIZE, train_size=cfg.TRAIN_SIZE) learner = cnn_learner(db, models[cfg.MODEL], pretrained=cfg.PRETRAINED, wd=cfg.WD, model_dir=cfg.MODELS_PATH, metrics=[accuracy]) save_name = f'clf_{cfg.MODEL}' save_name = f'{save_name}_{getNextFilePath(cfg.MODELS_PATH, save_name)}' learner = learner.clip_grad(1.) set_BN_momentum(learner.model) learner.fit_one_cycle(cfg.EPOCHS, slice(cfg.LR), callbacks=[ SaveModelCallback(learner, monitor='valid_loss', name=save_name), AccumulateStep(learner, 64 // cfg.BATCH_SIZE), LearnerTensorboardWriter(learner, cfg.LOG, save_name, loss_iters=10, hist_iters=100, stats_iters=10) ]) learner.unfreeze() uf_save_name = 'uf_' + save_name learner.fit_one_cycle(cfg.EPOCHS, slice(cfg.LR / 10), callbacks=[ SaveModelCallback(learner, monitor='valid_loss', name=uf_save_name), AccumulateStep(learner, 64 // cfg.BATCH_SIZE), LearnerTensorboardWriter(learner, cfg.LOG, uf_save_name, loss_iters=10, hist_iters=100, stats_iters=10) ])
def train_model(learn, lr=0.001, lr_decay=0.8, batch_size=512, n_epochs=20, model_name='fastai_'): n = len(learn.data.train_dl) phases = [(TrainingPhase(n).schedule_hp('lr', lr * (lr_decay ** (i)))) for i in range(n_epochs)] sched = GeneralScheduler(learn, phases) learn.callbacks.append(sched) learn.fit(n_epochs, callbacks=[SaveModelCallback(learn, name=model_name), EarlyStoppingCallback(learn, min_delta=0.001, patience=5)])
def main(test, s3_data, batch, debug): """Train a semantic segmentation FPN model on the CamVid-Tiramisu dataset.""" if batch: run_on_batch(test, debug) # Setup options batch_sz = 8 num_workers = 4 num_epochs = 20 lr = 1e-4 backbone_arch = 'resnet18' sample_pct = 1.0 if test: batch_sz = 1 num_workers = 0 num_epochs = 2 sample_pct = 0.01 # Setup data tmp_dir_obj = tempfile.TemporaryDirectory() tmp_dir = tmp_dir_obj.name output_dir = local_output_uri make_dir(output_dir) data_dir = download_data(s3_data, tmp_dir) data = get_databunch(data_dir, sample_pct=sample_pct, batch_sz=batch_sz, num_workers=num_workers) print(data) plot_data(data, output_dir) # Setup and train model num_classes = data.c model = SegmentationFPN(backbone_arch, num_classes) metrics = [acc_camvid] learn = Learner(data, model, metrics=metrics, loss_func=SegmentationFPN.loss, path=output_dir) learn.unfreeze() callbacks = [ SaveModelCallback(learn, monitor='valid_loss'), CSVLogger(learn, filename='log'), ] learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) # Plot predictions and sync plot_preds(data, learn, output_dir) if s3_data: sync_to_dir(output_dir, remote_output_uri)
def train(arch): data = get_data('train') learn = cnn_learner(data, arch, metrics=error_rate).mixup() learn.fit_one_cycle(15, callbacks=[ SaveModelCallback(learn, monitor='error_rate', mode='min', name='bestmodel') ]) learn.load('bestmodel') learn.unfreeze() learn.fit_one_cycle(5, max_lr=slice(1e-6, 1e-4), callbacks=[ SaveModelCallback(learn, monitor='error_rate', mode='min', name='bestmodel-unfreeze') ])
def _do_train(key, cycles, ps=None, mixup=False, unfreeze=False, cut=None, use_label_smoothing=False, get_learner=None, stats_repo=None, monitor='accuracy', **kwargs): assert stats_repo is not None assert get_learner is not None global all_results learn_args = {} if cut is not None: learn_args['cut'] = cut key = f'{key}_cut{cut}' if ps is not None: learn_args['ps'] = ps key = f'{key}_ps_{ps}' if use_label_smoothing: if isinstance(mixup, float): learn_args['loss_func'] = LabelSmoothingCrossEntropy(use_label_smoothing) key = f'{key}_ls{use_label_smoothing}' else: learn_args['loss_func'] = LabelSmoothingCrossEntropy() key = f'{key}_ls' learn = get_learner(**learn_args) # if ps is None: # learn = get_learner(cut=cut) # else: # learn = get_learner(ps=ps, cut=cut) # key = f'{key}_ps_{ps}' if unfreeze == 'all': key = key + '_ufa' learn.freeze() elif unfreeze: key = f'{key}_uf{unfreeze}' learn.freeze_to(unfreeze) if mixup: if isinstance(mixup, float): learn = learn.mixup(mixup) key = key + f'_m{mixup}' else: learn = learn.mixup() key = key + '_m' print(key) learn.fit_one_cycle(cycles, callbacks=[SaveModelCallback(learn, every='improvement', monitor=monitor, name='best')], **kwargs) learn.recorder.plot_losses() plt.show() stats = get_best_stats(learn) learn.save(key) stats_repo.add([key, stats]) return learn
def train(df: pd.DataFrame) -> Learner: print("Start training") learn = initialize_learner(df) callbacks = [ SaveModelCallback(learn, monitor="accuracy"), DataBunchCallback(data) ] learn.fit_one_cycle(8, callbacks=callbacks) learn.unfreeze() learn.fit_one_cycle(2, callbacks=callbacks) print("Finished training") return learn
def get_callbacks(learner, mod_name, early_stop=True, patience=5, monitor='accuracy', min_delta=0.01): callbacks = [ SaveModelCallback(learner, every='improvement', name=f'{mod_name}-opt_accuracy', monitor='accuracy'), SaveModelCallback(learner, every='improvement', name=f'{mod_name}-opt_val_loss'), WandbCallback(learner, monitor=monitor, input_type='images', log='all') ] if early_stop: callbacks.append( EarlyStoppingCallback(learner, patience=patience, min_delta=min_delta, monitor=monitor)) return callbacks
def fit_model(learn, epoch, learner_saved, encoder_saved): if learner_saved and encoder_saved: learn.load(learner_saved) learn.load_encoder(encoder_saved) learn.fit_one_cycle(epoch, 2e-3, moms=(0.8, 0.7), callbacks=[ SaveModelCallback(learn), ReduceLROnPlateauCallback(learn, factor=0.8) ]) leaner_to_save = "lm_" + str(epoch) + "_ep_lr2-3Px" encoder_to_save = "lm_" + str(epoch) + "_ep_lr2-3_encx" learn.save(leaner_to_save) learn.save_encoder(encoder_to_save) return learn, leaner_to_save, encoder_to_save
def fit_clas(model_path:str, sp_model:str, wd:float=0., qrnn:bool=True, n_hid:int=2304, load_enc:str=None, split_seed:int=None): PATH = Path(model_path) # torch.backends.cudnn.enabled=False defaults.text_spec_tok.append(NL) #add a New Line special char sp_vocab = Vocab( get_itos(sp_model) ) mycust_tok = CustomTokenizer(SPTokenizer,sp_model,pre_rules=default_rules) all_texts_df = pd.read_csv('../data/haha_2019_train.csv') all_texts_df.funniness_average.fillna(0,inplace=True) raw_text = all_texts_df.loc[:,'text'] print("Default Rules:\n",[x.__name__ for x in default_rules],"\n\n") for rule in default_rules: raw_text = raw_text.apply(lambda x: rule(str(x))) all_texts_df['new_text'] = raw_text #databunch adds `xxbos` so don't add here kfolder = KFold(n_splits=5, random_state=split_seed, shuffle=True) for n_fold, (train_idx,valid_idx) in enumerate(kfolder.split(all_texts_df)): df_train,df_valid = split_data_by_idx(all_texts_df,train_idx,valid_idx) data = TextClasDataBunch.from_df(PATH,df_train,df_valid, tokenizer=mycust_tok, vocab=sp_vocab, text_cols='new_text', label_cols='funniness_average') config = awd_lstm_clas_config.copy() config['qrnn'] = qrnn config['n_hid'] = n_hid print(config) learn = text_classifier_learner(data, AWD_LSTM, drop_mult=0.7,pretrained=False,config=config) if load_enc : learn.load_encoder(load_enc) learn.fit_one_cycle(2, 1e-2, wd=wd ) learn.unfreeze() learn.fit_one_cycle(15, slice(1e-3/(2.6**4),5e-3), moms=(0.7,0.4), wd=wd, pct_start=0.25, div_factor=8., callbacks=[SaveModelCallback(learn,every='improvement',mode='min', name='best_vloss_model_Q')]) learn.save(f'haha_regr_0609_fld{n_fold}_{seed}') print(f"Reg Fold: {n_fold} RndSeed: {seed},{min(learn.recorder.val_losses)}")
learn = text_classifier_learner(data_cls, AWD_LSTM, config=config, pretrained=False, **trn_args) #load pretrained finetuned model learn.load_encoder('prachathai_enc') #train unfrozen learn.freeze_to(-1) learn.fit_one_cycle(1, 2e-2, moms=(0.8, 0.7)) learn.freeze_to(-2) learn.fit_one_cycle(1, slice(1e-2 / (2.6**4), 1e-2), moms=(0.8, 0.7)) learn.freeze_to(-3) learn.fit_one_cycle(10, slice(5e-3 / (2.6**4), 5e-3), moms=(0.8, 0.7), callbacks=[ SaveModelCallback(learn, every='improvement', monitor='val_loss', name='prachathai_cls') ]) #learn.unfreeze() #learn.fit_one_cycle(10, slice(1e-3 / (2.6 ** 4), 1e-3), moms=(0.8, 0.7)) #save test results probs, y_true = learn.get_preds(ds_type=DatasetType.Test, ordered=True) probs = probs.numpy() pickle.dump(probs, open(f'{model_path}probs.pkl', 'wb'))
data = (ImageList.from_csv( path, 'train_fastai_format.csv', folder='preprocessed/224/train').split_by_rand_pct(seed=42).label_from_df( label_delim=' ').transform(tfms, size=( sz, sz)).add_test(str(path) + '/preprocessed/224/test/' + test_fns).databunch( bs=bs, num_workers=8).normalize(imagenet_stats)) model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=6) learn = Learner(data, model, metrics=[accuracy_thresh], model_dir=path / 'models/eff_net').to_fp16() learn.unfreeze() learn.load(pretrained_model) learn.fit_one_cycle(10, lr, callbacks=[ EarlyStoppingCallback(learn, min_delta=0.001, patience=3), SaveModelCallback(learn, every='epoch', name='effb0-224') ])
# view data data.show_batch(rows=10, ds_type=DatasetType.Train) plt.savefig(base_path / 'classification' / 'batch_example_train.svg') data.show_batch(rows=10, ds_type=DatasetType.Valid) plt.savefig(base_path / 'classification' / 'batch_example_valid.svg') learn = cnn_learner(data, arch, pretrained=True, metrics=[accuracy]) # learn.unfreeze() # learn.load(arch.__name__) # learn.lr_find() # learn.recorder.plot() # plt.show() cbs = [SaveModelCallback(learn, monitor='accuracy', name='best')] learn.fit_one_cycle(5, max_lr=1e-3, callbacks=cbs) learn.save(arch.__name__) learn.recorder.plot_losses() plt.savefig(base_path / 'classification' / 'loss.svg') learn.show_results(rows=20) plt.savefig(base_path / 'classification' / 'show_results.svg') preds, y, losses = learn.get_preds(with_loss=True) interpreter = ClassificationInterpretation(learn, preds, y, losses) interpreter.plot_confusion_matrix(normalize=True, figsize=(8, 8)) plt.savefig(base_path / 'classification' / 'cm.svg') interpreter.most_confused()
def __init__(self, data_path: str = 'lang_model', emb_sz: int = 800, qrnn: bool = False, bidir: bool = False, n_layers: int = 4, n_hid: int = 2500, bs: int = 104, bptt: int = 67, lr: float = 0.0013, wd: float = .012, one_cycle: bool = True, cycle_len: int = 1) -> None: """ Instantiate AWD_LSTM Language Model with hyper-parameters. data_path: str path where databunch is loaded from emb_sz: int size of word embeddings qrnn: bool whether or not to use qrnn (requires CudNN) bidir: bool if RNN should be bi-directional n_layers: int number of layers in lang model n_hid: int number of hidden units in model lr: float learning rate bptt: int back-propigation-through-time; max sequence length through which gradients will be accumulated. bs: int batch size The hyper-parameters are stored in a fastai dict called `fastai.text.models.awd_lstm_lm_config`: {'emb_sz': 400, 'n_hid': 1150, 'n_layers': 3, 'pad_token': 1, 'qrnn': False, 'bidir': False, 'output_p': 0.1, 'hidden_p': 0.15, 'input_p': 0.25, 'embed_p': 0.02,'weight_p': 0.2, 'tie_weights': True, 'out_bias': True} """ self.lr, self.wd, self.one_cycle, self.cycle_len = lr, wd, one_cycle, cycle_len awd_lstm_lm_config.update( dict(emb_sz=emb_sz, qrnn=qrnn, bidir=bidir, n_layers=n_layers, n_hid=n_hid)) #log params wb_handle = wandb.init(config=awd_lstm_lm_config) wandb.config.update({ 'data_path': str(data_path), 'bs': bs, 'bptt': bptt, 'lr': lr }) self.csv_name = 'history_' + wb_handle.name wandb.config.update({'csvlog_save_path': self.csv_name}) # instantiate databunch self.data_lm = load_data(data_path, bs=bs, bptt=bptt) # instantiate language model self.learn = language_model_learner(data=self.data_lm, arch=AWD_LSTM, pretrained=False, model_dir=Path('models_' + wb_handle.name), config=awd_lstm_lm_config) self.full_model_path = str(self.learn.path / self.learn.model_dir) wandb.config.update({'model_save_path': self.full_model_path}) # prepare callbacks escb = EarlyStoppingCallback(learn=self.learn, patience=2) smcb = SaveModelCallback(learn=self.learn, name='best_' + wb_handle.name) rpcb = ReduceLROnPlateauCallback(learn=self.learn, patience=1) csvcb = CSVLogger(learn=self.learn, filename=self.csv_name) wb = wandbCallback(self.learn) self.callbacks = [escb, smcb, rpcb, csvcb, wb] self.fit()
procs=[] df = (TabularList.from_df(data, path=path, cat_names=cat_vars, cont_names=cont_vars, procs=procs) .split_by_idx(test) .label_from_df(cols=target) .add_test(TabularList.from_df(etd, path=path, cat_names=cat_vars, cont_names=cont_vars, procs=procs)) .databunch(num_workers=0, bs=1024)) df.dls[0].dl = df.dls[0].new(sampler=LongerRandomSampler(data_source=df.train_ds, mult=2), shuffle=False).dl for num_seed, seed in enumerate(seeds): logger.info(f'Model {num_fold} seed {num_seed}') set_seed(seed) model = my_TabularModel(emb_szs, len(df.cont_names), out_sz=df.c, layers=layers, ps=ps, emb_drop=emb_drop, y_range=None, use_bn=use_bn, cont_emb=cont_emb, cont_emb_notu=cont_emb_notu) learn = Learner(df, model, metrics=None, callback_fns=AUROC, wd=0.1) learn.fit_one_cycle(15, max_lr=1e-2, callbacks=[SaveModelCallback(learn, every='improvement', monitor='AUROC', name=f'fold{fold_seed}_{num_fold}_seed_{seed}'), AugShuffCallback(learn)]) pred, _ = learn.get_preds() pred = pred[:,1] pred_test, _ = learn.get_preds(DatasetType.Test) pred_test = pred_test[:,1] sub_preds.loc[:, num_fold] = pred_test results.append(np.max(learn.recorder.metrics)) logger.info('result ' + str(results[-1])) np.save(f'oof_fold{fold_seed}_{num_fold}_seed_{seed}.npy', pred) np.save(f'test_fold{fold_seed}_{num_fold}_seed_{seed}.npy', pred_test) del learn, pred, model, pred_test; gc.collect() del df; gc.collect()
def IoU(input, target): input = torch.sigmoid(input) input = (input > 0.5).float() target = target.float() intersection = (input * target).sum() return intersection / ((input + target).sum() - intersection + 1.0) from models.LinkNet import LinkNet152 model = LinkNet152(num_classes=1, pretrained=True) # learn = unet_learner(data, models.resnet18,bottle=True,metrics=[dice,IoU],callback_fns=[BnFreeze,LossMetrics]).to_fp16() learn = Learner(data, model, loss_func=SemsegLossWeighted(), metrics=[dice, IoU], callback_fns=[BnFreeze, LossMetrics, CSVLogger]).to_fp16() learn = learn.to_distributed(arg.local_rank) learn.load('model_4') learn.model.unfreeze() lr = 1e-6 learn.fit_one_cycle(10, slice(lr), callbacks=[ SaveModelCallback(learn, every='epoch', monitor='accuracy', name='models2') ])
# In[25]: #learn.lr_find() #<-- uncomment to determine the learning rate (commented to reduce time) #learn.recorder.plot(suggestion=True) # In[26]: from fastai.callbacks import SaveModelCallback # In[ ]: learn.fit_one_cycle( EPOCHS, LR, callbacks=[SaveModelCallback(learn, every='epoch', monitor='accuracy')]) # In[ ]: # learn.recorder.plot_losses() # learn.recorder.plot_metrics() # In[ ]: learn.save(SAVE_NAME) #learn.export() # ## Inference and Submission Generation # Let's now load our test csv and process the DataFrame like we did for the training data.
src = ( ImageList.from_csv(Path('../data'), 'train.csv', folder='combined', suffix='.jpg') # Load data from csv .random_split_by_pct(0.2) # split data into training and validation set (20% validation) .label_from_df(label_delim=';') # label data using the tags column (second column is default) ) data = ( src.transform(tfms, size=400) # Apply transforms and scale images to 128x128 .databunch(bs=48).normalize(imagenet_stats) # Create databunch with batchsize=64 and normalize the images ) acc_02 = partial(accuracy_thresh, thresh=0.2) f_score = partial(fbeta, thresh=0.2) learn = create_cnn(data, models.resnet50, metrics=[acc_02, f_score]) lr = 0.01 learn.unfreeze() learn.fit_one_cycle(5, lr, callbacks=[SaveModelCallback(learn)]) learn.save('finished') learn.load('bestmodel') learn.export()
learn = language_model_learner(data, AWD_LSTM, config=config, pretrained=False, **trn_args) learn.opt_fn = partial(optim.Adam, betas=(0.8, 0.99)) learn.callback_fns += [partial(CSVLogger, filename='logs')] print('learner done') #train frozen print('training frozen') learn.freeze_to(-1) learn.fit_one_cycle(1, 1e-2, moms=(0.8, 0.7)) #train unfrozen print('training unfrozen') learn.unfreeze() learn.fit_one_cycle(20, 1e-3, moms=(0.8, 0.7), callbacks=[ SaveModelCallback(learn, every='improvement', monitor='accuracy', name='thwiki_lm') ]) learn.load('thwiki_lm') learn.save_encoder('thwiki_enc') print('saved')
metrics=[METRIC], device=DEVICE, model_dir=LOGGING_FOLDER) set_BN_momentum(learn.model, batch_size=BATCH_SIZE) learn.clip_grad(1.) # callbacks csv_logger = CSVLogger(learn=learn, filename=f'{LOGGING_FOLDER}/fit_trace', append=True) early_stopping = EarlyStoppingCallback(learn=learn, monitor='accuracy', patience=PATIENCE) save_model = SaveModelCallback(learn=learn, monitor='accuracy', name='best_model') acc_grad = AccumulateStep(learn, 64 // BATCH_SIZE) opt_lr = 0.001 # fit with frozen learn.fit_one_cycle( cyc_len=3, max_lr=opt_lr, callbacks=[acc_grad, csv_logger, early_stopping, save_model]) # fit entire model with saving on the best epoch learn.unfreeze() learn.fit_one_cycle( cyc_len=12,
learn.unfreeze() # learn.lr_find() #<-- uncomment to determine the learning rate (commented to reduce time) # learn.recorder.plot(suggestion=True) # In[31]: from fastai.callbacks import SaveModelCallback # In[ ]: learn.fit_one_cycle(18,1e-5,callbacks=[SaveModelCallback(learn, every='epoch', monitor='accuracy')]) # In[ ]: # learn.recorder.plot_losses() # learn.recorder.plot_metrics() # In[22]: learn.save(SAVE_NAME) learn.export()
def run(): models = { 'resnet34': mod.resnet34, 'resnet50': mod.resnet50, 'resnet101': mod.resnet101, 'resnet152': mod.resnet152 } save_name = f'mtl_{cfg.MODEL}_{cfg.TRAIN_SIZE}' save_name = f'{save_name}_{getNextFilePath(cfg.MODELS_PATH, save_name)}' test_list = (MultiTaskList.from_folder(cfg.TEST_PATH, extensions=['.dcm'])) best = 0 pred_path = cfg.PRED_PATH / save_name if not pred_path.is_dir(): pred_path.mkdir() project = neptune.init('schwobr/SIIM-Pneumothorax') for k, db in enumerate( load_data_kfold_mtl(cfg.LABELS, bs=cfg.BATCH_SIZE, train_size=cfg.TRAIN_SIZE, xtra_tfms=[gaussian_noise()])): print(f'fold {k}') learner = multi_task_unet_learner( db, models[cfg.MODEL], log_vars=torch.tensor(cfg.LOG_VARS), pretrained=cfg.PRETRAINED, loss_func=MTLLoss(CrossEntropyFlat(), CrossEntropyFlat(axis=1)), wd=cfg.WD, model_dir=cfg.MODELS_PATH, opt_func=RangerW, metrics=[ mtl_metric(dice, dim=1), mtl_metric(accuracy, dim=0), average_mtl_metric([dice, accuracy], [1, 0]) ]) fold_name = f'fold{k}_' + save_name set_BN_momentum(learner.model) learner.fit_one_cycle(cfg.EPOCHS, slice(cfg.LR), callbacks=[ SaveModelCallback(learner, monitor='dice_accuracy', name=fold_name), MTLLossCallback(learner), AccumulateStep(learner, 64 // cfg.BATCH_SIZE), NeptuneCallback(learner, project, name=fold_name, params={ 'lr': cfg.LR, 'wd': cfg.WD, 'size': cfg.TRAIN_SIZE }), LearnerTensorboardWriter(learner, cfg.LOG, fold_name, loss_iters=10, hist_iters=50, stats_iters=10) ]) met = max([met[0] for met in learner.recorder.metrics]) if met > best: learner.save(save_name) best = met print(f'New best fold {k} with dice {best}') # learner.neptune_callback.send_artifact( # cfg.MODELS_PATH/(fold_name+'.pth')) learner.neptune_callback.stop() learner.unfreeze() fold_name = 'uf_' + fold_name learner.fit_one_cycle(cfg.UNFROZE_EPOCHS, slice(cfg.LR / 500, cfg.LR / 10), callbacks=[ SaveModelCallback(learner, monitor='dice_accuracy', name=fold_name), MTLLossCallback(learner), AccumulateStep(learner, 64 // cfg.BATCH_SIZE), NeptuneCallback(learner, project, name=fold_name, params={ 'lr': cfg.LR, 'wd': cfg.WD, 'size': cfg.TRAIN_SIZE }), LearnerTensorboardWriter(learner, cfg.LOG, fold_name, loss_iters=10, hist_iters=50, stats_iters=10) ]) met = max([met[0] for met in learner.recorder.metrics]) if met > best: learner.save(save_name) best = met print(f'New best fold {k} with dice {best}') # learner.neptune_callback.send_artifact( # cfg.MODELS_PATH/(fold_name+'.pth')) learner.neptune_callback.stop() learner.data.add_test(test_list, label=[test_list.items[0], '-1'], tfms=(), tfm_y=True) save_preds_mtl(learner, pred_path / str(k)) exp = project.create_experiment(name=save_name, description='k-fold mtl training', params={ 'lr': cfg.LR, 'wd': cfg.WD, 'size': cfg.TRAIN_SIZE }) # exp.send_artifact(cfg.MODELS_PATH/(save_name+'.pth')) learner.load(save_name) learner.data.add_test(test_list, label=[test_list.items[0], '-1'], tfms=(), tfm_y=True) thr, thr_clf = get_best_thrs_mtl(learner, plot=False, a=0., test_size=cfg.TEST_SIZE, exp=None, fig_path=cfg.FIG_PATH / (save_name + '.png')) create_submission_kfold_mtl(learner, cfg.SUB_PATH / (save_name + '.csv'), pred_path, test_size=cfg.TEST_SIZE, thr=thr, clf_thr=0.) exp.send_artifact(cfg.SUB_PATH / (save_name + '.csv')) exp.stop()
# + # learn.fit_one_cycle(32, max_lr=slice(0.2e-2,1e-2), wd=[1e-3,0.1e-1], pct_start=0.0, # div_factor=100, callbacks = [logger, SaveModelCallback(learn,monitor='metric_tot', # mode='max',name=f'model_{fold}'),MixUpCallback(learn)]) # changed config learn.fit_one_cycle(100, max_lr=slice(0.2e-2, 1.5e-2), wd=[1e-4, 1e-3], pct_start=0.0, div_factor=100, callbacks=[ logger, SaveModelCallback(learn, monitor='metric_tot', mode='max', name=f'model_{fold}'), MixUpCallback(learn) ]) #metrics: Metric_grapheme, Metric_vowel, Metric_consonant, Metric_tot (competition metric) # - log_df = pd.read_csv("./log1.csv") plt.plot(log_df["metric_idx"]) plt.plot(log_df["metric_idx.1"]) plt.plot(log_df["metric_idx.2"]) plt.show() plt.plot(log_df["train_loss"]) plt.plot(log_df["valid_loss"])
data.c = tree.n_obj + tree.n_parts loss = parts.Loss(tree, preds_func=split_pred) metrics = partial(parts.BrodenMetrics, obj_tree=tree, preds_func=split_pred) learn = unet_learner(data, models.resnet50, loss_func=loss, callback_fns=[metrics, utils.DataTime]) lr = 2e-4 learn.fit_one_cycle(5, lr, callbacks=[ SaveModelCallback(learn, monitor='object-P.A.', name='unet-stage1'), CSVLogger(learn, filename='unet-stage1') ]) learn.unfreeze() learn.fit_one_cycle(10, slice(1e-6, lr / 5), callbacks=[ SaveModelCallback(learn, monitor='object-P.A.', name='unet-stage2'), CSVLogger(learn, filename='unet-stage2') ])
def train(valid_fold, conf_name): f = open(f'./configs/{conf_name}.yaml') conf = edict(yaml.load(f)) class_cnt = conf.class_cnt backbone_name = conf.backbone unfreeze = True #conf.unfreeze if 'unfreeze' in conf else False epoch = 50 assert int(valid_fold) <= 4 # batch_id = str(round(time.time())) backbone = get_backbone(backbone_name) df = pd.read_csv('./input/train.csv', names=['file_name', 'label']) df['fold'] = df.file_name % 5 df['file_name'] = df.file_name.astype('str') + '.jpg' # #print(df.head(), df.shape) # if class_cnt <= 2: # df.label = np.where(df.label>=1, 1, 0) data = ( ImageList.from_df( df, './input/train/', ).split_by_idx(df.loc[df.fold == valid_fold].index) # split_by_valid_func(lambda o: int(os.path.basename(o).split('.')[0])%5==i) .label_from_df(cols='label', label_cls=FloatList) # .add_test_folder('./input/test') .transform(get_transforms(), size=200).databunch(bs=16)).normalize(imagenet_stats) test_data = ImageList.from_folder(path="./input/test") data.add_test(test_data) #data.show_batch(rows=3, figsize=(15,15)) #head = create_head(nf, nc, lin_ftrs, ps=ps, concat_pool=concat_pool, bn_final=bn_final) learn = cnn_learner(data, backbone, metrics=[root_mean_squared_error], loss_func=nn.MSELoss(), custom_head=None) print(learn.model) checkpoint_name = f'{backbone()._get_name()}_rf{valid_fold}' callbacks = [ EarlyStoppingCallback(learn, monitor='root_mean_squared_error', min_delta=1e-5, patience=5), SaveModelCallback(learn, monitor='root_mean_squared_error', name=checkpoint_name, every='improvement') ] print( f'=====Fold:{valid_fold}, Total epoch:{epoch}, {conf_name}, backbone:{backbone_name}=========' ) if unfreeze: learn.freeze_to(-2) learn.fit_one_cycle(epoch, callbacks=callbacks) oof_val = get_oof_df(learn, DatasetType.Valid) oof_test = get_oof_df(learn, DatasetType.Test) os.makedirs('./output/stacking/', exist_ok=True) import socket host_name = socket.gethostname() # score_list = np.array(learn.recorder.metrics) # best_epoch = np.argmax(score_list) # best_score = np.max(score_list) val_len = len(learn.data.valid_ds.items) train_len = len(learn.data.train_ds.items) from sklearn.metrics import accuracy_score best_score = accuracy_score(oof_val.iloc[:, 0].astype(int), oof_val.iloc[:, -1].astype(int)) oof_file = f'./output/stacking/{version}_{host_name[:5]}_s{best_score:6.5f}_{conf_name}_f{valid_fold}_val{val_len}_trn{train_len}.h5' print(f'Stacking file save to:{oof_file}') save_stack_feature(oof_val, oof_test, oof_file)
learn.split([model.head]) learn.freeze() if is_warmup: learn.fit_one_cycle(1, lr_warmup) except Exception as e: print(e) pass learn.unfreeze() train_schedule(learn, epochs, max_lr, cbs=[ SaveModelCallback(learn, name=f'model', monitor=monitor_metric), slack_cb ], is_oversampling=is_oversampling, lr_sched=lr_sched) # save only state dict (weights) - requires the model when loading torch.save(learn.model.state_dict(), f'{fold}.pth') move_models_2_savedir(SAVE_DIR, models=[f'./{fold}.pth' for fold in range(n_folds)]) config.toDir(SAVE_DIR) sampler.is_train = False evaluate_model_dir(SAVE_DIR, sampler=sampler)
input_p=0.6, embed_p=0.1, weight_p=0.5) trn_args = dict(bptt=70, drop_mult=0.7, alpha=2, beta=1, max_len=500) learn = text_classifier_learner(data_cls, AWD_LSTM, config=config, pretrained=False, **trn_args) #load pretrained finetuned model learn.load_encoder('wisesight_enc') #train unfrozen learn.freeze_to(-1) learn.fit_one_cycle(1, 2e-2, moms=(0.8, 0.7)) learn.freeze_to(-2) learn.fit_one_cycle(1, slice(1e-2 / (2.6**4), 1e-2), moms=(0.8, 0.7)) learn.freeze_to(-3) learn.fit_one_cycle(1, slice(5e-3 / (2.6**4), 5e-3), moms=(0.8, 0.7)) learn.unfreeze() learn.fit_one_cycle(10, slice(1e-3 / (2.6**4), 1e-3), moms=(0.8, 0.7), callbacks=[ SaveModelCallback(learn, every='improvement', monitor='accuracy', name='bestmodel') ])
.view(-1,shape[1],shape[2]*n,shape[3]) #x: bs x C x N*4 x 4 x = self.head(x) return x fname = 'RNXT50' for fold in range(nfolds): data = get_data(fold) model = Model() learn = Learner(data, model, loss_func=nn.CrossEntropyLoss(), opt_func=Over9000, metrics=[KappaScore(weights='quadratic')]).to_fp16() logger = CSVLogger(learn, f'log_{fname}_{fold}') learn.clip_grad = 1.0 learn.split([model.head]) learn.unfreeze() learn.fit_one_cycle(16, max_lr=1e-3, div_factor=100, pct_start=0.0, callbacks=[ SaveModelCallback(learn, name=f'model', monitor='kappa_score') ]) torch.save(learn.model.state_dict(), f'{fname}_{fold}.pth')
def train(self, tmp_dir): """Train a model.""" self.print_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. train_images = [] train_lbl_bbox = [] for annotation_path in glob.glob(join(chip_dir, 'train/*.json')): images, lbl_bbox = get_annotations(annotation_path) train_images += images train_lbl_bbox += lbl_bbox val_images = [] val_lbl_bbox = [] for annotation_path in glob.glob(join(chip_dir, 'valid/*.json')): images, lbl_bbox = get_annotations(annotation_path) val_images += images val_lbl_bbox += lbl_bbox images = train_images + val_images lbl_bbox = train_lbl_bbox + val_lbl_bbox img2bbox = dict(zip(images, lbl_bbox)) get_y_func = lambda o: img2bbox[o.name] num_workers = 0 if self.train_opts.debug else 4 data = ObjectItemList.from_folder(chip_dir) data = data.split_by_folder() data = data.label_from_func(get_y_func) data = data.transform( get_transforms(), size=self.task_config.chip_size, tfm_y=True) data = data.databunch( bs=self.train_opts.batch_sz, collate_fn=bb_pad_collate, num_workers=num_workers) print(data) if self.train_opts.debug: make_debug_chips( data, self.task_config.class_map, tmp_dir, train_uri) # Setup callbacks and train model. ratios = [1/2, 1, 2] scales = [1, 2**(-1/3), 2**(-2/3)] model_arch = getattr(models, self.train_opts.model_arch) encoder = create_body(model_arch, cut=-2) model = RetinaNet(encoder, data.c, final_bias=-4) crit = RetinaNetFocalLoss(scales=scales, ratios=ratios) learn = Learner(data, model, loss_func=crit, path=train_dir) learn = learn.split(retina_net_split) model_path = get_local_path(self.backend_opts.model_uri, tmp_dir) pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: print('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) learn.load(pretrained_path[:-4]) callbacks = [ TrackEpochCallback(learn), SaveModelCallback(learn, every='epoch'), MyCSVLogger(learn, filename='log'), ExportCallback(learn, model_path), SyncCallback(train_dir, self.backend_opts.train_uri, self.train_opts.sync_interval) ] learn.unfreeze() learn.fit(self.train_opts.num_epochs, self.train_opts.lr, callbacks=callbacks) # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
if HS_MODEL is not None: learn.model.load_state_dict(torch.load(HS_MODEL)['model']) set_BN_momentum(learn.model, batch_size=BATCH_SIZE) learn.clip_grad(1.) # callbacks csv_logger = CSVLogger(learn=learn, filename=f'{LOGGING_FOLDER}/fit_trace', append=True) early_stopping = EarlyStoppingCallback(learn=learn, monitor='dice', patience=PATIENCE) save_model = SaveModelCallback(learn=learn, monitor='dice', name='best_model') acc_grad = AccumulateStep(learn, 64 // BATCH_SIZE) # # find optimal LR # learn.lr_find(stop_div=True, num_it=100) # learn.recorder.plot(suggestion=True) # opt_lr = learn.recorder.min_grad_lr # print(f'Initial optimal lr: {opt_lr}') if TRAIN_MODE: if HS_MODEL is None: opt_lr = 0.001
# Load Learner learn = multitask_model_learner(combined_data, config.copy(), opt_func=opt_func) if not args.half: learn.clip_grad(1.0) if args.load: state = torch.load(path / args.load, map_location='cpu') get_model(learn.model).load_state_dict(state['model'], strict=False) learn.model.cuda() if args.save: save_path = path / learn.model_dir / args.save save_path.parent.mkdir(parents=True, exist_ok=True) if args.half: learn = learn.to_fp16(clip=1.0, dynamic=True, max_scale=2**18) if is_distributed: learn = learn.to_distributed(args.local_rank, cache_dir=path / 'dist_logs') if args.data_parallel: learn = learn.to_parallel() if args.local_rank == 0: learn.callbacks.append(SaveModelCallback(learn, name=f'{args.save}_best')) learn.fit_one_cycle(args.epochs, args.lr, div_factor=args.div_factor, pct_start=.3, final_div=50, wd=args.wd) if args.local_rank == 0: learn.save(f'{args.save}')