def fit_one_cycle(learn: Learner, cyc_len: int, max_lr: Union[Floats, slice] = defaults.lr, moms: Tuple[float, float] = (0.95, 0.85), div_factor: float = 25., pct_start: float = 0.3, final_div: float = None, wd: float = None, callbacks: Optional[CallbackList] = None, tot_epochs: int = None, start_epoch: int = None, teachers: Optional[list] = None) -> None: "Fit a model following the 1cycle policy." max_lr = learn.lr_range(max_lr) callbacks = listify(callbacks) callbacks.append( OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor, pct_start=pct_start, final_div=final_div, tot_epochs=tot_epochs, start_epoch=start_epoch)) learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks, teachers=teachers)
def fit(self, learner: Learner, weigth_decay: float): reduce_lr_callback = ReduceLRCallback( learner, mult_coeff=self.mult_coeff, max_times_lr_decrease=self.max_lr_reduction_times, patience=self.patience) learner.callbacks.append(reduce_lr_callback) learner.fit(epochs=self.max_epochs, lr=self.init_lr, wd=weigth_decay)
def objective(trial): # type: (optuna.trial.Trial) -> float model = nn.Sequential(nn.Linear(20, 1), nn.Sigmoid()) learn = Learner(data_bunch, model, metrics=[accuracy], callback_fns=[ partial(FastAIPruningCallback, trial=trial, monitor='valid_loss') ]) learn.fit(1) return 1.0
def main(): model = PSMNet(args.maxdisp, args.mindisp).cuda() if args.load_model is not None: if args.load is not None: warn('args.load is not None. load_model will be covered by load.') ckpt = torch.load(args.load_model, 'cpu') if 'model' in ckpt.keys(): pretrained = ckpt['model'] elif 'state_dict' in ckpt.keys(): pretrained = ckpt['state_dict'] else: raise RuntimeError() pretrained = { k.replace('module.', ''): v for k, v in pretrained.items() } model.load_state_dict(pretrained) train_dl = DataLoader(KITTIRoiDataset(args.data_dir, 'train', args.resolution, args.maxdisp, args.mindisp), batch_size=args.batch_size, shuffle=True, num_workers=args.workers) val_dl = DataLoader(KITTIRoiDataset(args.data_dir, 'val', args.resolution, args.maxdisp, args.mindisp), batch_size=args.batch_size, num_workers=args.workers) loss_fn = PSMLoss() databunch = DataBunch(train_dl, val_dl, device='cuda') learner = Learner(databunch, model, loss_func=loss_fn, model_dir=args.model_dir) learner.callbacks = [ DistributedSaveModelCallback(learner), TensorBoardCallback(learner) ] if num_gpus > 1: learner.to_distributed(get_rank()) if args.load is not None: learner.load(args.load) if args.mode == 'train': learner.fit(args.epochs, args.maxlr) elif args.mode == 'train_oc': fit_one_cycle(learner, args.epochs, args.maxlr) else: raise ValueError('args.mode not supported.')
def bs_find(learn: Learner, lr, bs=None, num_it: int = None, n_batch=20, beta=0.99, wd=None): """""" if not bs: bs = learn.data.batch_size if not num_it: num_it = len(learn.data.train_ds) // bs cb = BSFinder(learn, bs=bs, num_it=num_it, n_batch=n_batch, beta=beta) epochs = int(np.ceil(num_it / len(learn.data.train_dl))) learn.fit(epochs, lr, callbacks=[cb], wd=wd) return cb
class Sequential: def __init__(self, model=None): self.layers = [] self.last_dim = None self.model = model self.device = torch.device('cpu') if torch.cuda.is_available(): self.device = torch.device('cuda') def add(self, layer): layer = layer.get_layer(self.last_dim) self.last_dim = layer['output_dim'] self.layers.extend(layer['layers']) def compile(self, loss, optimizer=None): if len(self.layers) > 0: self.model = nn.Sequential(*self.layers) self.loss = loss def fit(self, x, y, bs, epochs, lr=1e-3, one_cycle=True, get_lr=True): db = create_db(x, y, bs=bs) self.learn = Learner(db, self.model, loss_func=self.loss) if one_cycle: self.learn.fit_one_cycle(epochs, lr) else: self.learn.fit(epochs, lr) def lr_find(self, x, y, bs): db = create_db(x, y, bs=bs) learn = Learner(db, self.model, loss_func=self.loss) learn.lr_find() clear_output() learn.recorder.plot(suggestion=True) def predict(self, x): self.learn.model.eval() with torch.no_grad(): y_preds = self.learn.model(torch.Tensor(x).to(device)) return y_preds.cpu().numpy()
super().__init__(learn) def on_epoch_end(self, **kwargs): for _ in train_eval_loader: pass # --- databunch = DataBunch(train_loader, val_loader) opt_func = partial(SGD, lr=0.1, momentum=0.9, weight_decay=5e-4) learner = Learner(data=databunch, model=model, opt_func=opt_func, loss_func=criterion, metrics=[accuracy], true_wd=False) learner.unfreeze() # --- callback_on_train_begin = MakeRandomizerConsistentOnTrainBegin(learner) callback_on_epoch_end = MakeRandomizerConsistentOnEpochEnd(learner) learner.fit(epochs=150, lr=0.1, wd=5e-4, callbacks=[callback_on_train_begin, callback_on_epoch_end]) learner.fit(epochs=100, lr=0.01, wd=5e-4, callbacks=[callback_on_epoch_end]) learner.fit(epochs=100, lr=0.001, wd=5e-4, callbacks=[callback_on_epoch_end])
def train(self, tmp_dir): """Train a model.""" self.print_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. train_images = [] train_lbl_bbox = [] for annotation_path in glob.glob(join(chip_dir, 'train/*.json')): images, lbl_bbox = get_annotations(annotation_path) train_images += images train_lbl_bbox += lbl_bbox val_images = [] val_lbl_bbox = [] for annotation_path in glob.glob(join(chip_dir, 'valid/*.json')): images, lbl_bbox = get_annotations(annotation_path) val_images += images val_lbl_bbox += lbl_bbox images = train_images + val_images lbl_bbox = train_lbl_bbox + val_lbl_bbox img2bbox = dict(zip(images, lbl_bbox)) get_y_func = lambda o: img2bbox[o.name] num_workers = 0 if self.train_opts.debug else 4 data = ObjectItemList.from_folder(chip_dir) data = data.split_by_folder() data = data.label_from_func(get_y_func) data = data.transform( get_transforms(), size=self.task_config.chip_size, tfm_y=True) data = data.databunch( bs=self.train_opts.batch_sz, collate_fn=bb_pad_collate, num_workers=num_workers) print(data) if self.train_opts.debug: make_debug_chips( data, self.task_config.class_map, tmp_dir, train_uri) # Setup callbacks and train model. ratios = [1/2, 1, 2] scales = [1, 2**(-1/3), 2**(-2/3)] model_arch = getattr(models, self.train_opts.model_arch) encoder = create_body(model_arch, cut=-2) model = RetinaNet(encoder, data.c, final_bias=-4) crit = RetinaNetFocalLoss(scales=scales, ratios=ratios) learn = Learner(data, model, loss_func=crit, path=train_dir) learn = learn.split(retina_net_split) model_path = get_local_path(self.backend_opts.model_uri, tmp_dir) pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: print('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) learn.load(pretrained_path[:-4]) callbacks = [ TrackEpochCallback(learn), SaveModelCallback(learn, every='epoch'), MyCSVLogger(learn, filename='log'), ExportCallback(learn, model_path), SyncCallback(train_dir, self.backend_opts.train_uri, self.train_opts.sync_interval) ] learn.unfreeze() learn.fit(self.train_opts.num_epochs, self.train_opts.lr, callbacks=callbacks) # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
def run_ner( lang: str = 'eng', log_dir: str = 'logs', task: str = NER, batch_size: int = 1, lr: float = 5e-5, epochs: int = 1, dataset: str = 'data/conll-2003/', loss: str = 'cross', max_seq_len: int = 128, do_lower_case: bool = False, warmup_proportion: float = 0.1, grad_acc_steps: int = 1, rand_seed: int = None, fp16: bool = False, loss_scale: float = None, ds_size: int = None, data_bunch_path: str = 'data/conll-2003/db', bertAdam: bool = False, freez: bool = False, one_cycle: bool = False, discr: bool = False, lrm: int = 2.6, div: int = None, tuned_learner: str = None, do_train: str = False, do_eval: str = False, save: bool = False, name: str = 'ner', mask: tuple = ('s', 's'), ): name = "_".join( map(str, [ name, task, lang, mask[0], mask[1], loss, batch_size, lr, max_seq_len, do_train, do_eval ])) log_dir = Path(log_dir) log_dir.mkdir(parents=True, exist_ok=True) init_logger(log_dir, name) if rand_seed: random.seed(rand_seed) np.random.seed(rand_seed) torch.manual_seed(rand_seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(rand_seed) trainset = dataset + lang + '/train.txt' devset = dataset + lang + '/dev.txt' testset = dataset + lang + '/test.txt' bert_model = 'bert-base-cased' if lang == 'eng' else 'bert-base-multilingual-cased' print(f'Lang: {lang}\nModel: {bert_model}\nRun: {name}') model = BertForTokenClassification.from_pretrained(bert_model, num_labels=len(VOCAB), cache_dir='bertm') model = torch.nn.DataParallel(model) model_lr_group = bert_layer_list(model) layers = len(model_lr_group) kwargs = {'max_seq_len': max_seq_len, 'ds_size': ds_size, 'mask': mask} train_dl = DataLoader(dataset=NerDataset(trainset, bert_model, train=True, **kwargs), batch_size=batch_size, shuffle=True, collate_fn=partial(pad, train=True)) dev_dl = DataLoader(dataset=NerDataset(devset, bert_model, **kwargs), batch_size=batch_size, shuffle=False, collate_fn=pad) test_dl = DataLoader(dataset=NerDataset(testset, bert_model, **kwargs), batch_size=batch_size, shuffle=False, collate_fn=pad) data = DataBunch(train_dl=train_dl, valid_dl=dev_dl, test_dl=test_dl, collate_fn=pad, path=Path(data_bunch_path)) loss_fun = ner_loss_func if loss == 'cross' else partial(ner_loss_func, zero=True) metrics = [Conll_F1()] learn = Learner( data, model, BertAdam, loss_func=loss_fun, metrics=metrics, true_wd=False, layer_groups=None if not freez else model_lr_group, path='learn', ) # initialise bert adam optimiser train_opt_steps = int(len(train_dl.dataset) / batch_size) * epochs optim = BertAdam(model.parameters(), lr=lr, warmup=warmup_proportion, t_total=train_opt_steps) if bertAdam: learn.opt = OptimWrapper(optim) else: print("No Bert Adam") # load fine-tuned learner if tuned_learner: print('Loading pretrained learner: ', tuned_learner) learn.load(tuned_learner) # Uncomment to graph learning rate plot # learn.lr_find() # learn.recorder.plot(skip_end=15) # set lr (discriminative learning rates) if div: layers = div lrs = lr if not discr else learn.lr_range(slice(lr / lrm**(layers), lr)) results = [['epoch', 'lr', 'f1', 'val_loss', 'train_loss', 'train_losses']] if do_train: for epoch in range(epochs): if freez: lay = (layers // (epochs - 1)) * epoch * -1 if lay == 0: print('Freeze') learn.freeze() elif lay == layers: print('unfreeze') learn.unfreeze() else: print('freeze2') learn.freeze_to(lay) print('Freezing layers ', lay, ' off ', layers) # Fit Learner - eg train model if one_cycle: learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7)) else: learn.fit(1, lrs) results.append([ epoch, lrs, learn.recorder.metrics[0][0], learn.recorder.val_losses[0], np.array(learn.recorder.losses).mean(), learn.recorder.losses, ]) if save: m_path = learn.save(f"{lang}_{epoch}_model", return_path=True) print(f'Saved model to {m_path}') if save: learn.export(f'{lang}.pkl') if do_eval: res = learn.validate(test_dl, metrics=metrics) met_res = [f'{m.__name__}: {r}' for m, r in zip(metrics, res[1:])] print(f'Validation on TEST SET:\nloss {res[0]}, {met_res}') results.append(['val', '-', res[1], res[0], '-', '-']) with open(log_dir / (name + '.csv'), 'a') as resultFile: wr = csv.writer(resultFile) wr.writerows(results)
cycle_len=args.cycle_len, cycle_mult=args.cycle_mult, start_epoch=args.start_epoch) ] learn = Learner(db, model, metrics=[rmse, mae], callback_fns=callback_fns, wd=args.wd, loss_func=contribs_rmse_loss) if args.start_epoch > 0: learn.load(model_se_str + f'_{args.start_epoch-1}') else: learn.load(model_str) torch.cuda.empty_cache() if distributed_train: learn = learn.to_distributed(args.local_rank) learn.fit(args.epochs) # make predictions n_val = len(train_df[train_df['molecule_id'].isin(val_mol_ids)]) val_preds = np.zeros((n_val, args.epochs)) test_preds = np.zeros((len(test_df), args.epochs)) for m in range(args.epochs): print(f'Predicting for model {m}') learn.load(model_se_str + f'_{m}') val_contrib_preds = learn.get_preds(DatasetType.Valid) test_contrib_preds = learn.get_preds(DatasetType.Test) val_preds[:, m] = val_contrib_preds[0][:, -1].detach().numpy() test_preds[:, m] = test_contrib_preds[0][:, -1].detach().numpy() val_preds = val_preds * C.SC_STD + C.SC_MEAN test_preds = test_preds * C.SC_STD + C.SC_MEAN
torch.cuda.manual_seed(12345) from torch.utils.data import DataLoader, RandomSampler from dataset import dataset PATH = '/home/fernand/raven/neutral_pth/' train = dataset(PATH, 'train') valid = dataset(PATH, 'val') test = dataset(PATH, 'test') trainloader = DataLoader(train, batch_size=32, shuffle=True, num_workers=6) #trainloader = DataLoader(train, batch_size=32, sampler=RandomSampler(train, replacement=True, num_samples=3200), shuffle=False, num_workers=6) validloader = DataLoader(valid, batch_size=32, shuffle=False, num_workers=6) testloader = DataLoader(test, batch_size=32, shuffle=False, num_workers=6) from functools import partial from fastai.basic_data import DataBunch from fastai.basic_train import Learner from torch.optim import Adam from loss import loss_fn, Accuracy from wren import WReN db = DataBunch(train_dl=trainloader, valid_dl=validloader, test_dl=testloader) wren = WReN() opt = partial(Adam, betas=(0.9, 0.999), eps=1e-8) learn = Learner(data=db, model=wren, opt_func=opt, loss_func=loss_fn, metrics=[Accuracy()]) #from fastai.train import to_fp16 #learn = to_fp16(learn) learn.fit(20, lr=1e-4, wd=0.0)