def run_alexnet(input_path, output_path, batch_size, epochs, learning_rate): # Load image databunch print("[INFO] Loading Data") data = load_catsvsdog(input_path, batch_size) # Defining the learner alexnet_learner = Learner( data=data, model=ALEXNet(n_class=data.c), loss_func=nn.CrossEntropyLoss(), metrics=accuracy, ) # Training the model print("[INFO] Training started.") alexnet_learner.fit_one_cycle(epochs, learning_rate) # Validation accuracy val_acc = int( np.round(alexnet_learner.recorder.metrics[-1][0].numpy().tolist(), 3) * 1000) # Saving the model print("[INFO] Saving model weights.") alexnet_learner.save("alexnet_catsvsdog_stg_1_" + str(val_acc)) # Evaluation print("[INFO] Evaluating Network.") evaluate_model(alexnet_learner, output_path, plot=True)
def main(test, s3_data, batch, debug): """Train a semantic segmentation FPN model on the CamVid-Tiramisu dataset.""" if batch: run_on_batch(test, debug) # Setup options batch_sz = 8 num_workers = 4 num_epochs = 20 lr = 1e-4 backbone_arch = 'resnet18' sample_pct = 1.0 if test: batch_sz = 1 num_workers = 0 num_epochs = 2 sample_pct = 0.01 # Setup data tmp_dir_obj = tempfile.TemporaryDirectory() tmp_dir = tmp_dir_obj.name output_dir = local_output_uri make_dir(output_dir) data_dir = download_data(s3_data, tmp_dir) data = get_databunch(data_dir, sample_pct=sample_pct, batch_sz=batch_sz, num_workers=num_workers) print(data) plot_data(data, output_dir) # Setup and train model num_classes = data.c model = SegmentationFPN(backbone_arch, num_classes) metrics = [acc_camvid] learn = Learner(data, model, metrics=metrics, loss_func=SegmentationFPN.loss, path=output_dir) learn.unfreeze() callbacks = [ SaveModelCallback(learn, monitor='valid_loss'), CSVLogger(learn, filename='log'), ] learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) # Plot predictions and sync plot_preds(data, learn, output_dir) if s3_data: sync_to_dir(output_dir, remote_output_uri)
def main(args): train_image_dir = args.train_image_dir train_label_dir = args.train_label_dir val_image_dir = args.val_image_dir val_label_dir = args.val_label_dir batch_size = 4 num_workers = 4 optimizer = optim.SGD criterion = MixLoss(nn.BCEWithLogitsLoss(), 0.5, DiceLoss(), 1) thresh = 0.1 recall_partial = partial(recall, thresh=thresh) precision_partial = partial(precision, thresh=thresh) fbeta_score_partial = partial(fbeta_score, thresh=thresh) model = UNet(1, 1, first_out_channels=16) model = nn.DataParallel(model.cuda()) transforms = [ tsfm.Window(-200, 1000), tsfm.MinMaxNorm(-200, 1000) ] ds_train = FracNetTrainDataset(train_image_dir, train_label_dir, transforms=transforms) dl_train = FracNetTrainDataset.get_dataloader(ds_train, batch_size, False, num_workers) ds_val = FracNetTrainDataset(val_image_dir, val_label_dir, transforms=transforms) dl_val = FracNetTrainDataset.get_dataloader(ds_val, batch_size, False, num_workers) databunch = DataBunch(dl_train, dl_val, collate_fn=FracNetTrainDataset.collate_fn) learn = Learner( databunch, model, opt_func=optimizer, loss_func=criterion, metrics=[dice, recall_partial, precision_partial, fbeta_score_partial] ) learn.fit_one_cycle( 200, 1e-1, pct_start=0, div_factor=1000, callbacks=[ ShowGraph(learn), ] ) if args.save_model: save(model.module.state_dict(), "./model_weights.pth")
def run_mnist(input_path, output_path, batch_size, epochs, learning_rate, model=Mnist_NN()): path = Path(input_path) ## Defining transformation ds_tfms = get_transforms( do_flip=False, flip_vert=False, max_rotate=15, max_zoom=1.1, max_lighting=0.2, max_warp=0.2, ) ## Creating Databunch data = (ImageItemList.from_folder(path, convert_mode="L").split_by_folder( train="training", valid="testing").label_from_folder().transform( tfms=ds_tfms, size=28).databunch(bs=batch_size)) ## Defining the learner mlp_learner = Learner(data=data, model=model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy) # Training the model mlp_learner.fit_one_cycle(epochs, learning_rate) val_acc = int( np.round(mlp_learner.recorder.metrics[-1][0].numpy().tolist(), 3) * 1000) ## Saving the model mlp_learner.save("mlp_mnist_stg_1_" + str(val_acc)) ## Evaluation print("Evaluating Network..") interp = ClassificationInterpretation.from_learner(mlp_learner) print(classification_report(interp.y_true, interp.pred_class)) ## Plotting train and validation loss mlp_learner.recorder.plot_losses() plt.savefig(output_path + "/loss.png") mlp_learner.recorder.plot_metrics() plt.savefig(output_path + "/metric.png")
def run_shallownet(input_path, output_path, batch_size, epochs, learning_rate): path = Path(input_path) # Creating Databunch data = ( ImageItemList.from_folder(path) .split_by_folder(train="train", valid="test") .label_from_folder() .transform(tfms=None, size=32) .databunch(bs=batch_size) ) # Defining the learner sn_learner = Learner( data=data, model=ShallowNet(n_class=data.c, size=32, in_channels=3), loss_func=nn.CrossEntropyLoss(), metrics=accuracy, ) # Training the model sn_learner.fit_one_cycle(epochs, learning_rate) val_acc = int( np.round(sn_learner.recorder.metrics[-1][0].numpy().tolist(), 3) * 1000 ) # Saving the model sn_learner.save("sn_cifar10_stg_1_" + str(val_acc)) # Evaluation print("Evaluating Network..") interp = ClassificationInterpretation.from_learner(sn_learner) print(classification_report(interp.y_true, interp.pred_class)) # Plotting train and validation loss sn_learner.recorder.plot_losses() plt.savefig(output_path + "/loss.png") sn_learner.recorder.plot_metrics() plt.savefig(output_path + "/metric.png")
class Sequential: def __init__(self, model=None): self.layers = [] self.last_dim = None self.model = model self.device = torch.device('cpu') if torch.cuda.is_available(): self.device = torch.device('cuda') def add(self, layer): layer = layer.get_layer(self.last_dim) self.last_dim = layer['output_dim'] self.layers.extend(layer['layers']) def compile(self, loss, optimizer=None): if len(self.layers) > 0: self.model = nn.Sequential(*self.layers) self.loss = loss def fit(self, x, y, bs, epochs, lr=1e-3, one_cycle=True, get_lr=True): db = create_db(x, y, bs=bs) self.learn = Learner(db, self.model, loss_func=self.loss) if one_cycle: self.learn.fit_one_cycle(epochs, lr) else: self.learn.fit(epochs, lr) def lr_find(self, x, y, bs): db = create_db(x, y, bs=bs) learn = Learner(db, self.model, loss_func=self.loss) learn.lr_find() clear_output() learn.recorder.plot(suggestion=True) def predict(self, x): self.learn.model.eval() with torch.no_grad(): y_preds = self.learn.model(torch.Tensor(x).to(device)) return y_preds.cpu().numpy()
def do_train( cfg, model, train_dl, valid_dl, optimizer, loss_fn, metrics=[], callbacks: list = [], ): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS data_bunch = DataBunch(train_dl, valid_dl) learn = Learner(data_bunch, model, loss_func=loss_fn) callbacks.append(LoggingLog(learn, "template_model.train")) if metrics: learn.metrics = metrics learn.fit_one_cycle(epochs, cfg.SOLVER.BASE_LR)
path = untar_data(URLs.CIFAR) # In[6]: number_of_epochs = 100 data = ImageDataBunch.from_folder(path, valid="test", bs=128).normalize(cifar_stats) learn = Learner( data, model, silent=True ) #,metrics=accuracy)# silent prevents the training metrics from printing # In[ ]: #learn.save('simple_model') # In[ ]: learn.lr_find() # In[ ]: learn.recorder.plot(suggestion=True) # In[ ]: # .fit_one_cycle uses Triangular Learning Rates, whereas .fit does not learn.fit_one_cycle(number_of_epochs, max_lr=learn.recorder.min_grad_lr, callbacks=cb(learn)) # learn.fit(number_of_epochs, lr=learn.recorder.min_grad_lr)
def main(): parser = ArgumentParser() parser.add_argument('--pregenerated_data', type=Path, required=True) parser.add_argument('--output_dir', type=Path, required=True) parser.add_argument("--bert_model", type=str, required=True, choices=["bert-base-uncased", "bert-large-uncased", "bert-base-cased", "bert-base-multilingual-cased", "bert-base-chinese"]) parser.add_argument("--do_lower_case", action="store_true") parser.add_argument("--reduce_memory", action="store_true", help="Store training data as on-disc memmaps to massively reduce memory usage") parser.add_argument("--epochs", type=int, default=3, help="Number of epochs to train for") parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") parser.add_argument('--gradient_accumulation_steps', type=int, default=1, help="Number of updates steps to accumulate before performing a backward/update pass.") parser.add_argument("--train_batch_size", default=16, type=int, help="Total batch size for training.") parser.add_argument('--fp16', action='store_true', help="Whether to use 16-bit float precision instead of 32-bit") parser.add_argument('--loss_scale', type=float, default=0, help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" "0 (default value): dynamic loss scaling.\n" "Positive power of 2: static loss scaling value.\n") parser.add_argument("--warmup_proportion", default=0.1, type=float, help="Proportion of training to perform linear learning rate warmup for. " "E.g., 0.1 = 10%% of training.") parser.add_argument("--learning_rate", default=3e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument('--seed', type=int, default=None, help="random seed for initialization") args = parser.parse_args() assert args.pregenerated_data.is_dir(), \ "--pregenerated_data should point to the folder of files made by pregenerate_training_data.py!" samples_per_epoch = [] for i in range(args.epochs): epoch_file = args.pregenerated_data / f"epoch_{i}.json" metrics_file = args.pregenerated_data / f"epoch_{i}_metrics.json" if epoch_file.is_file() and metrics_file.is_file(): metrics = json.loads(metrics_file.read_text()) samples_per_epoch.append(metrics['num_training_examples']) else: if i == 0: exit("No training data was found!") print(f"Warning! There are fewer epochs of pregenerated data ({i}) than training epochs ({args.epochs}).") print("This script will loop over the available data, but training diversity may be negatively impacted.") num_data_epochs = i break else: num_data_epochs = args.epochs print(samples_per_epoch) if args.gradient_accumulation_steps < 1: raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( args.gradient_accumulation_steps)) args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps if args.seed: random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) if args.output_dir.is_dir() and list(args.output_dir.iterdir()): logging.warning(f"Output directory ({args.output_dir}) already exists and is not empty!") args.output_dir.mkdir(parents=True, exist_ok=True) tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) total_train_examples = 0 for i in range(args.epochs): # The modulo takes into account the fact that we may loop over limited epochs of data total_train_examples += samples_per_epoch[i % len(samples_per_epoch)] num_train_optimization_steps = int( total_train_examples / args.train_batch_size / args.gradient_accumulation_steps) # Prepare model model = BertForPreTraining.from_pretrained(args.bert_model) model = torch.nn.DataParallel(model) # Prepare optimizer optimizer = BertAdam train_dataloader = DataLoader( PregeneratedData(args.pregenerated_data, tokenizer,args.epochs, args.train_batch_size), batch_size=args.train_batch_size, ) data = DataBunch(train_dataloader,train_dataloader) global_step = 0 logging.info("***** Running training *****") logging.info(f" Num examples = {total_train_examples}") logging.info(" Batch size = %d", args.train_batch_size) logging.info(" Num steps = %d", num_train_optimization_steps) def loss(x, y): return x.mean() learn = Learner(data, model, optimizer, loss_func=loss, true_wd=False, path='learn', layer_groups=bert_layer_list(model), ) lr= args.learning_rate layers = len(bert_layer_list(model)) lrs = learn.lr_range(slice(lr/(2.6**4), lr)) for epoch in range(args.epochs): learn.fit_one_cycle(1, lrs, wd=0.01) # save model at half way point if epoch == args.epochs//2: savem = learn.model.module.bert if hasattr(learn.model, 'module') else learn.model.bert output_model_file = args.output_dir / (f"pytorch_fastai_model_{args.bert_model}_{epoch}.bin") torch.save(savem.state_dict(), str(output_model_file)) print(f'Saved bert to {output_model_file}') savem = learn.model.module.bert if hasattr(learn.model, 'module') else learn.model.bert output_model_file = args.output_dir / (f"pytorch_fastai_model_{args.bert_model}_{args.epochs}.bin") torch.save(savem.state_dict(), str(output_model_file)) print(f'Saved bert to {output_model_file}')
def run_ner( lang: str = 'eng', log_dir: str = 'logs', task: str = NER, batch_size: int = 1, epochs: int = 1, dataset: str = 'data/conll-2003/', loss: str = 'cross', max_seq_len: int = 128, do_lower_case: bool = False, warmup_proportion: float = 0.1, rand_seed: int = None, ds_size: int = None, data_bunch_path: str = 'data/conll-2003/db', tuned_learner: str = None, do_train: str = False, do_eval: str = False, save: bool = False, nameX: str = 'ner', mask: tuple = ('s', 's'), ): name = "_".join( map(str, [ nameX, task, lang, mask[0], mask[1], loss, batch_size, max_seq_len, do_train, do_eval ])) log_dir = Path(log_dir) log_dir.mkdir(parents=True, exist_ok=True) init_logger(log_dir, name) if rand_seed: random.seed(rand_seed) np.random.seed(rand_seed) torch.manual_seed(rand_seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(rand_seed) trainset = dataset + lang + '/train.txt' devset = dataset + lang + '/dev.txt' testset = dataset + lang + '/test.txt' bert_model = 'bert-base-cased' if lang == 'eng' else 'bert-base-multilingual-cased' print(f'Lang: {lang}\nModel: {bert_model}\nRun: {name}') model = BertForTokenClassification.from_pretrained(bert_model, num_labels=len(VOCAB), cache_dir='bertm') if tuned_learner: print('Loading pretrained learner: ', tuned_learner) model.bert.load_state_dict(torch.load(tuned_learner)) model = torch.nn.DataParallel(model) model_lr_group = bert_layer_list(model) layers = len(model_lr_group) kwargs = {'max_seq_len': max_seq_len, 'ds_size': ds_size, 'mask': mask} train_dl = DataLoader(dataset=NerDataset(trainset, bert_model, train=True, **kwargs), batch_size=batch_size, shuffle=True, collate_fn=partial(pad, train=True)) dev_dl = DataLoader(dataset=NerDataset(devset, bert_model, **kwargs), batch_size=batch_size, shuffle=False, collate_fn=pad) test_dl = DataLoader(dataset=NerDataset(testset, bert_model, **kwargs), batch_size=batch_size, shuffle=False, collate_fn=pad) data = DataBunch(train_dl=train_dl, valid_dl=dev_dl, test_dl=test_dl, collate_fn=pad, path=Path(data_bunch_path)) train_opt_steps = int(len(train_dl.dataset) / batch_size) * epochs optim = BertAdam(model.parameters(), lr=0.01, warmup=warmup_proportion, t_total=train_opt_steps) loss_fun = ner_loss_func if loss == 'cross' else partial(ner_loss_func, zero=True) metrics = [Conll_F1()] learn = Learner( data, model, BertAdam, loss_func=loss_fun, metrics=metrics, true_wd=False, layer_groups=model_lr_group, path='learn' + nameX, ) learn.opt = OptimWrapper(optim) lrm = 1.6 # select set of starting lrs lrs_eng = [0.01, 5e-4, 3e-4, 3e-4, 1e-5] lrs_deu = [0.01, 5e-4, 5e-4, 3e-4, 2e-5] startlr = lrs_eng if lang == 'eng' else lrs_deu results = [['epoch', 'lr', 'f1', 'val_loss', 'train_loss', 'train_losses']] if do_train: learn.freeze() learn.fit_one_cycle(1, startlr[0], moms=(0.8, 0.7)) learn.freeze_to(-3) lrs = learn.lr_range(slice(startlr[1] / (1.6**15), startlr[1])) learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7)) learn.freeze_to(-6) lrs = learn.lr_range(slice(startlr[2] / (1.6**15), startlr[2])) learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7)) learn.freeze_to(-12) lrs = learn.lr_range(slice(startlr[3] / (1.6**15), startlr[3])) learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7)) learn.unfreeze() lrs = learn.lr_range(slice(startlr[4] / (1.6**15), startlr[4])) learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7)) if do_eval: res = learn.validate(test_dl, metrics=metrics) met_res = [f'{m.__name__}: {r}' for m, r in zip(metrics, res[1:])] print(f'Validation on TEST SET:\nloss {res[0]}, {met_res}') results.append(['val', '-', res[1], res[0], '-', '-']) with open(log_dir / (name + '.csv'), 'a') as resultFile: wr = csv.writer(resultFile) wr.writerows(results)
callback_fns = [ partial(GradientClipping, clip=10), GroupMeanLogMAE, partial(SaveModelCallback, every='improvement', mode='min', monitor='group_mean_log_mae', name=model_str) ] learn = Learner(db, model, metrics=[rmse, mae], callback_fns=callback_fns, wd=args.wd, loss_func=contribs_rmse_loss) if args.start_epoch > 0: learn.load(model_str) torch.cuda.empty_cache() if distributed_train: learn = learn.to_distributed(args.local_rank) learn.fit_one_cycle(args.epochs, max_lr=args.lr, start_epoch=args.start_epoch) # make predictions val_contrib_preds = learn.get_preds(DatasetType.Valid) test_contrib_preds = learn.get_preds(DatasetType.Test) val_preds = val_contrib_preds[0][:, -1].detach().numpy() * C.SC_STD + C.SC_MEAN test_preds = test_contrib_preds[0][:, -1].detach().numpy() * C.SC_STD + C.SC_MEAN # store results store_submit(test_preds, model_str, print_head=True) store_oof(val_preds, model_str, print_head=True)
def run_ner( lang: str = 'eng', log_dir: str = 'logs', task: str = NER, batch_size: int = 1, lr: float = 5e-5, epochs: int = 1, dataset: str = 'data/conll-2003/', loss: str = 'cross', max_seq_len: int = 128, do_lower_case: bool = False, warmup_proportion: float = 0.1, grad_acc_steps: int = 1, rand_seed: int = None, fp16: bool = False, loss_scale: float = None, ds_size: int = None, data_bunch_path: str = 'data/conll-2003/db', bertAdam: bool = False, freez: bool = False, one_cycle: bool = False, discr: bool = False, lrm: int = 2.6, div: int = None, tuned_learner: str = None, do_train: str = False, do_eval: str = False, save: bool = False, name: str = 'ner', mask: tuple = ('s', 's'), ): name = "_".join( map(str, [ name, task, lang, mask[0], mask[1], loss, batch_size, lr, max_seq_len, do_train, do_eval ])) log_dir = Path(log_dir) log_dir.mkdir(parents=True, exist_ok=True) init_logger(log_dir, name) if rand_seed: random.seed(rand_seed) np.random.seed(rand_seed) torch.manual_seed(rand_seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(rand_seed) trainset = dataset + lang + '/train.txt' devset = dataset + lang + '/dev.txt' testset = dataset + lang + '/test.txt' bert_model = 'bert-base-cased' if lang == 'eng' else 'bert-base-multilingual-cased' print(f'Lang: {lang}\nModel: {bert_model}\nRun: {name}') model = BertForTokenClassification.from_pretrained(bert_model, num_labels=len(VOCAB), cache_dir='bertm') model = torch.nn.DataParallel(model) model_lr_group = bert_layer_list(model) layers = len(model_lr_group) kwargs = {'max_seq_len': max_seq_len, 'ds_size': ds_size, 'mask': mask} train_dl = DataLoader(dataset=NerDataset(trainset, bert_model, train=True, **kwargs), batch_size=batch_size, shuffle=True, collate_fn=partial(pad, train=True)) dev_dl = DataLoader(dataset=NerDataset(devset, bert_model, **kwargs), batch_size=batch_size, shuffle=False, collate_fn=pad) test_dl = DataLoader(dataset=NerDataset(testset, bert_model, **kwargs), batch_size=batch_size, shuffle=False, collate_fn=pad) data = DataBunch(train_dl=train_dl, valid_dl=dev_dl, test_dl=test_dl, collate_fn=pad, path=Path(data_bunch_path)) loss_fun = ner_loss_func if loss == 'cross' else partial(ner_loss_func, zero=True) metrics = [Conll_F1()] learn = Learner( data, model, BertAdam, loss_func=loss_fun, metrics=metrics, true_wd=False, layer_groups=None if not freez else model_lr_group, path='learn', ) # initialise bert adam optimiser train_opt_steps = int(len(train_dl.dataset) / batch_size) * epochs optim = BertAdam(model.parameters(), lr=lr, warmup=warmup_proportion, t_total=train_opt_steps) if bertAdam: learn.opt = OptimWrapper(optim) else: print("No Bert Adam") # load fine-tuned learner if tuned_learner: print('Loading pretrained learner: ', tuned_learner) learn.load(tuned_learner) # Uncomment to graph learning rate plot # learn.lr_find() # learn.recorder.plot(skip_end=15) # set lr (discriminative learning rates) if div: layers = div lrs = lr if not discr else learn.lr_range(slice(lr / lrm**(layers), lr)) results = [['epoch', 'lr', 'f1', 'val_loss', 'train_loss', 'train_losses']] if do_train: for epoch in range(epochs): if freez: lay = (layers // (epochs - 1)) * epoch * -1 if lay == 0: print('Freeze') learn.freeze() elif lay == layers: print('unfreeze') learn.unfreeze() else: print('freeze2') learn.freeze_to(lay) print('Freezing layers ', lay, ' off ', layers) # Fit Learner - eg train model if one_cycle: learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7)) else: learn.fit(1, lrs) results.append([ epoch, lrs, learn.recorder.metrics[0][0], learn.recorder.val_losses[0], np.array(learn.recorder.losses).mean(), learn.recorder.losses, ]) if save: m_path = learn.save(f"{lang}_{epoch}_model", return_path=True) print(f'Saved model to {m_path}') if save: learn.export(f'{lang}.pkl') if do_eval: res = learn.validate(test_dl, metrics=metrics) met_res = [f'{m.__name__}: {r}' for m, r in zip(metrics, res[1:])] print(f'Validation on TEST SET:\nloss {res[0]}, {met_res}') results.append(['val', '-', res[1], res[0], '-', '-']) with open(log_dir / (name + '.csv'), 'a') as resultFile: wr = csv.writer(resultFile) wr.writerows(results)
learn.summary() # + # learn.fit_one_cycle(32, max_lr=slice(0.2e-2,1e-2), wd=[1e-3,0.1e-1], pct_start=0.0, # div_factor=100, callbacks = [logger, SaveModelCallback(learn,monitor='metric_tot', # mode='max',name=f'model_{fold}'),MixUpCallback(learn)]) # changed config learn.fit_one_cycle(100, max_lr=slice(0.2e-2, 1.5e-2), wd=[1e-4, 1e-3], pct_start=0.0, div_factor=100, callbacks=[ logger, SaveModelCallback(learn, monitor='metric_tot', mode='max', name=f'model_{fold}'), MixUpCallback(learn) ]) #metrics: Metric_grapheme, Metric_vowel, Metric_consonant, Metric_tot (competition metric) # - log_df = pd.read_csv("./log1.csv") plt.plot(log_df["metric_idx"]) plt.plot(log_df["metric_idx.1"]) plt.plot(log_df["metric_idx.2"]) plt.show()
if args.triplets_online: learn.loss_func = TripletLoss(margin=1.0) learn.callbacks.append(TripletSetter(model, train_loader, train_loader2, semihard_negative, margin=1.0, triplets_per_class=100)) learn.callbacks.append(TripletSetter(model, val_loader , val_loader2 , semihard_negative, margin=1.0, triplets_per_class=125)) else: learn.loss_func = OnlineTripletLoss(1.0 ,SemihardNegativeTripletSelector(margin=1.0)) #learn.lr_find(start_lr=1e-6,end_lr=5e-2,no_grad_val = False,num_it=300) #plot_recorder(learn.recorder) plt.show() torch.cuda.empty_cache() learn.fit_one_cycle(cyc_len=1, max_lr=1e-2, div_factor=20.0, pct_start=0.25,no_grad_val=False) #,wd=#1e-7#,moms=(0.95, 0.85) #learn.save('16epochs_triplets') train_loader = schnetpack2.custom.data.AtomsLoader(data_train_triplets, batch_size=16, sampler =SequentialNRepeatSampler(data_train_triplets, N_repeats = 500), num_workers=9*torch.cuda.device_count(), pin_memory=True) val_loader = schnetpack2.custom.data.AtomsLoader(data_val_triplets , batch_size=16, sampler =SequentialNRepeatSampler(data_val_triplets, N_repeats = 500), num_workers=9*torch.cuda.device_count(), pin_memory=True) model = learn.model model.eval() if args.triplets_online: pass else: save_embeddings_triplets(train_loader, model, "embeddings_triplets_ICSD_online_train_{}".format(args.lat_dims)) save_embeddings_triplets(val_loader , model, "embeddings_triplets_ICSD_online_val_{}".format(args.lat_dims))
def train(config_path, opts): tmp_dir_obj = tempfile.TemporaryDirectory() tmp_dir = tmp_dir_obj.name cfg = load_config(config_path, opts) print(cfg) # Setup data databunch, full_databunch = build_databunch(cfg, tmp_dir) output_dir = setup_output_dir(cfg, tmp_dir) print(full_databunch) plotter = build_plotter(cfg) if not cfg.lr_find_mode and not cfg.predict_mode: plotter.plot_data(databunch, output_dir) # Setup model num_labels = databunch.c model = build_model(cfg, num_labels) metrics = [CocoMetric(num_labels)] learn = Learner(databunch, model, path=output_dir, metrics=metrics) fastai.basic_train.loss_batch = loss_batch best_model_path = join(output_dir, 'best_model.pth') last_model_path = join(output_dir, 'last_model.pth') # Train model callbacks = [ MyCSVLogger(learn, filename='log'), SubLossMetric(learn, model.subloss_names) ] if cfg.output_uri.startswith('s3://'): callbacks.append( SyncCallback(output_dir, cfg.output_uri, cfg.solver.sync_interval)) if cfg.model.init_weights: device = next(model.parameters()).device model.load_state_dict( torch.load(cfg.model.init_weights, map_location=device)) if not cfg.predict_mode: if cfg.overfit_mode: learn.fit_one_cycle(cfg.solver.num_epochs, cfg.solver.lr, callbacks=callbacks) torch.save(learn.model.state_dict(), best_model_path) learn.model.eval() print('Validating on training set...') learn.validate(full_databunch.train_dl, metrics=metrics) else: tb_logger = TensorboardLogger(learn, 'run') tb_logger.set_extra_args( model.subloss_names, cfg.overfit_mode) extra_callbacks = [ MySaveModelCallback( learn, best_model_path, monitor='coco_metric', every='improvement'), MySaveModelCallback(learn, last_model_path, every='epoch'), TrackEpochCallback(learn), ] callbacks.extend(extra_callbacks) if cfg.lr_find_mode: learn.lr_find() learn.recorder.plot(suggestion=True, return_fig=True) lr = learn.recorder.min_grad_lr print('lr_find() found lr: {}'.format(lr)) exit() learn.fit_one_cycle(cfg.solver.num_epochs, cfg.solver.lr, callbacks=callbacks) print('Validating on full validation set...') learn.validate(full_databunch.valid_dl, metrics=metrics) else: device = 'cuda' if torch.cuda.is_available() else 'cpu' model.load_state_dict( torch.load(join(output_dir, 'best_model.pth'), map_location=device)) model.eval() plot_dataset = databunch.train_ds print('Plotting predictions...') plot_dataset = databunch.train_ds if cfg.overfit_mode else databunch.valid_ds plotter.make_debug_plots(plot_dataset, model, databunch.classes, output_dir) if cfg.output_uri.startswith('s3://'): sync_to_dir(output_dir, cfg.output_uri)
def main(args): if args.deterministic: set_seed(42) # Set device if args.device is None: if torch.cuda.is_available(): args.device = 'cuda:0' else: args.device = 'cpu' defaults.device = torch.device(args.device) # Aggregate path and labels into list for fastai ImageDataBunch fnames, labels, is_valid = [], [], [] dataset = OpenFire(root=args.data_path, train=True, download=True, img_folder=args.img_folder) for sample in dataset.data: fnames.append( dataset._images.joinpath(sample['name']).relative_to(dataset.root)) labels.append(sample['target']) is_valid.append(False) dataset = OpenFire(root=args.data_path, train=False, download=True) for sample in dataset.data: fnames.append( dataset._images.joinpath(sample['name']).relative_to(dataset.root)) labels.append(sample['target']) is_valid.append(True) df = pd.DataFrame.from_dict( dict(name=fnames, label=labels, is_valid=is_valid)) # Split train and valid sets il = vision.ImageList.from_df( df, path=args.data_path).split_from_df('is_valid') # Encode labels il = il.label_from_df(cols='label', label_cls=FloatList if args.binary else CategoryList) # Set transformations il = il.transform(vision.get_transforms(), size=args.resize) # Create the Databunch data = il.databunch(bs=args.batch_size, num_workers=args.workers).normalize( vision.imagenet_stats) # Metric metric = partial(vision.accuracy_thresh, thresh=0.5) if args.binary else vision.error_rate # Create model model = models.__dict__[args.model](imagenet_pretrained=args.pretrained, num_classes=data.c, lin_features=args.lin_feats, concat_pool=args.concat_pool, bn_final=args.bn_final, dropout_prob=args.dropout_prob) # Create learner learner = Learner(data, model, wd=args.weight_decay, loss_func=CustomBCELogitsLoss() if args.binary else nn.CrossEntropyLoss(), metrics=metric) # Form layer group for optimization meta = model_meta.get(args.model, _default_meta) learner.split(meta['split']) # Freeze model's head if args.pretrained: learner.freeze() if args.resume: learner.load(args.resume) if args.unfreeze: learner.unfreeze() learner.fit_one_cycle(args.epochs, max_lr=slice(None, args.lr, None), div_factor=args.div_factor, final_div=args.final_div_factor) learner.save(args.checkpoint)
def main(test, s3_data, batch, debug): if batch: run_on_batch(test, debug) # Setup options bs = 16 size = 256 num_workers = 4 num_epochs = 100 lr = 1e-4 # for size 256 # Subtract 2 because there's no padding on final convolution grid_sz = 8 - 2 if test: bs = 8 size = 128 num_debug_images = 32 num_workers = 0 num_epochs = 1 # for size 128 grid_sz = 4 - 2 # Setup data make_dir(output_dir) data_dir = untar_data(URLs.PASCAL_2007, dest='/opt/data/pascal2007/data') img_path = data_dir/'train/' trn_path = data_dir/'train.json' trn_images, trn_lbl_bbox = get_annotations(trn_path) val_path = data_dir/'valid.json' val_images, val_lbl_bbox = get_annotations(val_path) images, lbl_bbox = trn_images+val_images, trn_lbl_bbox+val_lbl_bbox img2bbox = dict(zip(images, lbl_bbox)) get_y_func = lambda o: img2bbox[o.name] with open(trn_path) as f: d = json.load(f) classes = sorted(d['categories'], key=lambda x: x['id']) classes = [x['name'] for x in classes] classes = ['background'] + classes num_classes = len(classes) anc_sizes = torch.tensor([ [1, 1], [2, 2], [3, 3], [3, 1], [1, 3]], dtype=torch.float32) grid = ObjectDetectionGrid(grid_sz, anc_sizes, num_classes) score_thresh = 0.1 iou_thresh = 0.8 class MyObjectCategoryList(ObjectCategoryList): def analyze_pred(self, pred): boxes, labels, _ = grid.get_preds( pred.unsqueeze(0), score_thresh=score_thresh, iou_thresh=iou_thresh) return (boxes[0], labels[0]) class MyObjectItemList(ObjectItemList): _label_cls = MyObjectCategoryList def get_data(bs, size, ): src = MyObjectItemList.from_folder(img_path) if test: src = src[0:num_debug_images] src = src.split_by_files(val_images) src = src.label_from_func(get_y_func, classes=classes) src = src.transform(get_transforms(), size=size, tfm_y=True) return src.databunch(path=data_dir, bs=bs, collate_fn=bb_pad_collate, num_workers=num_workers) data = get_data(bs, size) print(data) plot_data(data, output_dir) # Setup model model = ObjectDetectionModel(grid) def loss(out, gt_boxes, gt_classes): gt = model.grid.encode(gt_boxes, gt_classes) box_loss, class_loss = model.grid.compute_losses(out, gt) return box_loss + class_loss metrics = [F1(grid, score_thresh=score_thresh, iou_thresh=iou_thresh)] learn = Learner(data, model, metrics=metrics, loss_func=loss, path=output_dir) callbacks = [ CSVLogger(learn, filename='log') ] # model.freeze_body() learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) plot_preds(data, learn, output_dir) if s3_data: sync_to_dir(output_dir, output_uri)
collate_fn=dlc.gdf_col, pin_memory=False, num_workers=0) databunch = DataBunch(t_data, v_data, collate_fn=dlc.gdf_col, device="cuda") t_final = time() - start print(t_final) print("Creating model") start = time() model = TabularModel(emb_szs=embeddings, n_cont=len(cont_names), out_sz=2, layers=[512, 256]) learn = Learner(databunch, model, metrics=[accuracy]) learn.loss_func = torch.nn.CrossEntropyLoss() t_final = time() - start print(t_final) print("Finding learning rate") start = time() learn.lr_find() learn.recorder.plot(show_moms=True, suggestion=True) learning_rate = 1.32e-2 epochs = 1 t_final = time() - start print(t_final) print("Running Training") start = time() learn.fit_one_cycle(epochs, learning_rate) t_final = time() - start print(t_final)
learn.split([learn.model.cnn[:6], learn.model.cnn[6:], learn.model.fc]) # In[13]: from fastai.callbacks import SaveModelCallback cb_save_model = SaveModelCallback(learn, every="epoch", name=name) cb_coach = CbCoachTrain(learn, n_train_batch=20) cb_sims = CbSims(learn) #cb_siamese_validate = SiameseValidateCallback(learn, txlog) cbs = [cb_save_model, cb_coach, cb_sims] #, cb_siamese_validate] # In[14]: learn.freeze_to(-1) learn.fit_one_cycle(3, callbacks=cbs) learn.unfreeze() # In[15]: enable_lr_find = 0 if enable_lr_find: print('LR plotting ...') learn.lr_find() learn.recorder.plot() plt.savefig('lr_find.png') # In[16]: max_lr = 1e-4 lrs = [max_lr / 20, max_lr / 5, max_lr]