def main(): model = model_dispatcher.MODEL_DISPATCHER[BASE_MODEL](True) #model.to(DEVICE) train_dataset = BengaliDatasetTrain(TRAINING_FOLDS, IMG_HEIGHT, IMG_WIDTH, MODEL_MEAN, MODEL_STD) train_loader = dataloader.DataLoader(dataset=train_dataset, batch_size=TEST_BATCH_SIZE, shuffle=True, num_workers=4) valid_dataset = BengaliDatasetTrain(VALIDATION_FOLDS, IMG_HEIGHT, IMG_WIDTH, MODEL_MEAN, MODEL_STD) valid_loader = dataloader.DataLoader(dataset=valid_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=4) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.3, verbose=True) for epoch in range(EPOCHS): train(train_dataset, train_loader, model, optimizer) val_score = evaluate(valid_dataset, valid_loader, model, optimizer) scheduler.step(val_score) torch.save(model.state_dict(), f'{BASE_MODEL}_{VALIDATION_FOLDS[0]}.bin')
def get_dataloaders(): # Training data train_dataset = BengaliDatasetTrain(folds=TRAINING_FOLDS, height=IMG_HEIGHT, width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) # Validation data val_dataset = BengaliDatasetTrain(folds=VALIDATION_FOLDS, height=IMG_HEIGHT, width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) # load training data in batches train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=4) # load validation data in batches val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=TEST_BATCH_SIZE, num_workers=4) return train_loader, val_loader
def main(): model = MODEL_DISPATCHER[BASE_MODEL]( pretrained=True) # pretrained true cos its training loop model.to(DEVICE) train_dataset = BengaliDatasetTrain(folds=TRAINING_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=4) valid_dataset = BengaliDatasetTrain(folds=VALIDATION_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=4) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) """ here we use all parameters. we can experiment with different paramaeters, se can also set differential learning rates for different layers""" scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=5, factor=0.3, verbose=True) """ some schedulers we need to step every batch or every epoch. LR scheduler on plateau: when model scores plateau the scheduler reduces learning rates mode = "min" :if we want to reduce the loss function min = "max" : here we will see recall score""" if torch.cuda.device_count() > 1: # if u have multiple gpus in system model = nn.DataParallel(model) """ we can impelement early stopping using: https://github.com/Bjarten/early-stopping-pytorch """ for epoch in range(EPOCHS): train(train_dataset, train_loader, model, optimizer) val_score = evaluate(valid_dataset, valid_loader, model) scheduler.step(val_score) torch.save(model.state_dict(), f"{BASE_MODEL}_fold{VALIDATION_FOLDS[0]}.bin")
def main(): print("Device is ", DEVICE) model = MODEL_DISPATCHER[BASE_MODEL](pretrain=True) model.to(DEVICE) print("Model loaded !!! ") train_dataset = BengaliDatasetTrain(folds=TRAINING_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=TRAINING_BATCH_SIZE, shuffle=True, num_workers=4, ) valid_dataset = BengaliDatasetTrain(folds=VAL_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) valid_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=4, ) optimizer = optim.Adam(model.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", patience=5, factor=0.3, verbose=True) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) for e in range(EPOCH): train(dataset=train_dataset, dataloader=train_loader, model=model, optimizer=optimizer) score = evaluate(dataset=valid_dataset, dataloader=valid_loader, model=model) scheduler.step(score) print("In the epoch {}, the loss in validation is {}".format(e, score)) torch.save(model.state_dict(), "{}_fold{}.pth".format(BASE_MODEL, VAL_FOLDS[0]))
def main(): model = MODEL_DISPATCHER[BASE_MODEL](pretrained=True) model.to(DEVICE) train_dataset = BengaliDatasetTrain(folds=TRAIN_FOLDS, img_ht=IMG_HT, img_wd=IMG_WD, mean=MODEL_MEAN, std=MODEL_STD) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=TRAIN_BAT_SIZE, shuffle=True, num_workers=4) valid_dataset = BengaliDatasetTrain(folds=VALID_FOLDS, img_ht=IMG_HT, img_wd=IMG_WD, mean=MODEL_MEAN, std=MODEL_STD) valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=TEST_BAT_SIZE, shuffle=False, num_workers=4) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.3, verbose=True) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) #early stopping for epoch in range(EPOCHS): # pdb.set_trace() train(train_dataset, train_loader, model, optimizer) # pdb.set_trace() with torch.no_grad(): val_score = eval(valid_dataset, valid_loader, model) scheduler.step(val_score) torch.save(model.state_dict(), f'{BASE_MODEL}_fold{VALID_FOLDS[0]}.bin')
def main(): model = MODEL_DISPATCHER[BASE_MODEL](pretrained=True) model.to(DEVICE) train_dataset = BengaliDatasetTrain( folds=TRAINING_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD ) train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=4 ) valid_dataset = BengaliDatasetTrain( folds=VALIDATION_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD ) valid_loader = torch.utils.data.DataLoader( dataset=valid_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=4 ) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=5, factor=0.3, verbose=True) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) #maybe earlystopping here for epoch in range(EPOCHS): train(train_dataset, train_loader, model, optimizer) val_score = evaluate(valid_dataset, valid_loader, model) scheduler.step(val_score) torch.save(model.state_dict(), f"{BASE_MODEL}_fold{VALIDATION_FOLDS[0]}.bin")
def main(): model = MODEL_DISPATCHER[BASE_MODEL](pretrained=True) model.to(DEVICE) train_dataset = BengaliDatasetTrain(folds=TRAINING_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=DATA_MEAN, std=DATA_STD) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True) test_dataset = BengaliDatasetTrain(folds=VALIDATION_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=DATA_MEAN, std=DATA_STD) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=TEST_BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=5, factor=0.3, verbose=True) if torch.cuda.device_count > 1: model = nn.DataParallel(model) for epoch in tqdm(range(parser.EPOCHS)): pass train(train_dataset, train_loader, model, optimizer) score = evaluate(test_dataset, test_loader, model) scheduler.step(score) torch.save(model.state_dict(), f"{BASE_MODEL}_fold{VALIDATION_FOLDS[]}.bin")
def main(): model = MODEL_DISPATCHER[BASE_MODEL](pretrained=True) model.to(DEVICE) train_dataset = BengaliDatasetTrain(folds=TRAINING_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=4) valid_dataset = BengaliDatasetTrain(folds=VALIDATION_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=TEST_BATCH_SIZE, shuffle=True, num_workers=4) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=5, factor=0.3, verbose=True) early_stopping = EarlyStopping(patience=5, verbose=True) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) best_score = -1 print("FOLD : ", VALIDATION_FOLDS[0]) val_scores = [] for epoch in range(1, EPOCHS + 1): train_loss, train_score = train(train_dataset, train_loader, model, optimizer) val_loss, val_score = evaluate(valid_dataset, valid_loader, model) scheduler.step(val_loss) if val_score > best_score: best_score = val_score torch.save(model.state_dict(), "{}_fold{}.pth".format(BASE_MODEL, VALIDATION_FOLDS[0])) epoch_len = len(str(EPOCHS)) print_msg = (f'[{epoch:>{epoch_len}}/{EPOCHS:>{epoch_len}}] ' + f'train_loss: {train_loss:.5f} ' + f'train_score: {train_score:.5f} ' + f'valid_loss: {val_loss:.5f} ' + f'valid_score: {val_score:.5f}') val_scores.append(val_score) print(print_msg) early_stopping(val_loss, model) if early_stopping.early_stop: print("Early stopping") break plt.style.use('ggplot') plt.figure() plt.plot(np.arange(0, len(val_scores)), val_scores, label='val_scores') plt.title('Recall Score') plt.xlabel('# of epochs') plt.ylabel('Recall Score') plt.legend(loc='lower right') plt.show()
import sys sys.path.append('../src/') import torch import matplotlib.pyplot as plt import numpy as np from dataset import BengaliDatasetTrain dataset = BengaliDatasetTrain(folds=[0,1], img_height=137, img_width=236, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) print(len(dataset)) idx = 0 img = dataset[idx]['image'] print(dataset[idx]['grapheme_root']) print(dataset[idx]['vowel_diacritic']) print(dataset[idx]['consonant_diacritic']) img_arr = img.nump() img_arr = np.transpose(img_arr, (0, 1, 2)) plt.imshow(img_arr)
def main(): print("Device is ", DEVICE) model = MODEL_DISPATCHER[BASE_MODEL](pretrain=True) # model.load_state_dict(torch.load("../se_net/20200307-154303/weights/best_se_net_fold4_model_3_macro_recall=0.9435.pth")) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) model.to(DEVICE) print("Model loaded !!! ") exp_name = datetime.now().strftime("%Y%m%d-%H%M%S") if not os.path.exists(os.path.join("../", BASE_MODEL)): os.mkdir(os.path.join("../", BASE_MODEL)) OUT_DIR = os.path.join("../", BASE_MODEL, exp_name) print("This Exp would be save in ", OUT_DIR) os.mkdir(OUT_DIR) os.mkdir(os.path.join(OUT_DIR, "weights")) os.mkdir(os.path.join(OUT_DIR, "log")) train_dataset = BengaliDatasetTrain(folds=TRAINING_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=TRAINING_BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=4, ) valid_dataset = BengaliDatasetTrain(folds=VAL_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True) optimizer = get_optimizer(model, parameters.get("momentum"), parameters.get("weight_decay"), parameters.get("nesterov")) lr_scheduler = get_lr_scheduler(optimizer, parameters.get("lr_max_value"), parameters.get("lr_max_value_epoch"), num_epochs=EPOCH, epoch_length=len(train_loader)) ## Define Trainer trainer = create_trainer(model, optimizer, DEVICE, WEIGHT_ONE, WEIGHT_TWO, WEIGHT_THR) # Recall for Training EpochMetric(compute_fn=macro_recall, output_transform=output_transform).attach(trainer, 'recall') pbar = ProgressBar() pbar.attach(trainer, metric_names='all') evaluator = create_evaluator(model, DEVICE) #Recall for evaluating EpochMetric(compute_fn=macro_recall, output_transform=output_transform).attach(evaluator, 'recall') def run_evaluator(engine): evaluator.run(valid_loader) def get_curr_lr(engine): lr = lr_scheduler.schedulers[0].optimizer.param_groups[0]['lr'] log_report.report('lr', lr) def score_fn(engine): score = engine.state.metrics['loss'] return score es_handler = EarlyStopping(patience=30, score_function=score_fn, trainer=trainer) evaluator.add_event_handler(Events.COMPLETED, es_handler) def default_score_fn(engine): score = engine.state.metrics['recall'] return score trainer.add_event_handler(Events.ITERATION_STARTED, lr_scheduler) best_model_handler = ModelCheckpoint( dirname=os.path.join(OUT_DIR, "weights"), filename_prefix=f"best_{BASE_MODEL}_fold{VAL_FOLDS[0]}", n_saved=3, global_step_transform=global_step_from_engine(trainer), score_name="macro_recall", score_function=default_score_fn) evaluator.add_event_handler(Events.COMPLETED, best_model_handler, { "model": model, }) trainer.add_event_handler(Events.EPOCH_COMPLETED, run_evaluator) trainer.add_event_handler(Events.EPOCH_COMPLETED, get_curr_lr) log_report = LogReport(evaluator, os.path.join(OUT_DIR, "log")) trainer.add_event_handler(Events.EPOCH_COMPLETED, log_report) trainer.add_event_handler( Events.EPOCH_COMPLETED, ModelSnapshotHandler(model, filepath=os.path.join( OUT_DIR, "weights", "{}_fold{}.pth".format( BASE_MODEL, VAL_FOLDS[0])))) trainer.run(train_loader, max_epochs=EPOCH) train_history = log_report.get_dataframe() train_history.to_csv(os.path.join( OUT_DIR, "log", "{}_fold{}_log.csv".format(BASE_MODEL, VAL_FOLDS[0])), index=False) print(train_history.head()) print("Trainning Done !!!")
def main(): model = MODEL_DISPATCHER[BASE_MODEL](pretrained=True) model.to(DEVICE) train_dataset = BengaliDatasetTrain( folds=TRAINING_FOLDS, img_height = IMG_HEIGHT, img_width = IMG_WIDTH, mean = MODEL_MEAN, std = MODEL_STD ) train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size= TRAIN_BATCH_SIZE, shuffle=True, num_workers=4 ) valid_dataset = BengaliDatasetTrain( folds=VALIDATION_FOLDS, img_height = IMG_HEIGHT, img_width = IMG_WIDTH, mean = MODEL_MEAN, std = MODEL_STD ) valid_loader = torch.utils.data.DataLoader( dataset=valid_dataset, batch_size= TEST_BATCH_SIZE, shuffle=True, num_workers=4 ) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=5, factor=0.3,verbose=True) early_stopping = EarlyStopping(patience=5, verbose=True) #if torch.cuda.device_count() > 1: # model = nn.DataParallel(model) best_score = -1 print("FOLD : ", VALIDATION_FOLDS[0] ) for epoch in range(1, EPOCHS+1): train_loss, train_score = train(train_dataset,train_loader, model, optimizer) val_loss, val_score = evaluate(valid_dataset, valid_loader, model) scheduler.step(val_loss) if val_score > best_score: best_score = val_score torch.save(model.state_dict(), f"{BASE_MODEL}_fold{VALIDATION_FOLDS[0]}.pth") epoch_len = len(str(EPOCHS)) print_msg = (f'[{epoch:>{epoch_len}}/{EPOCHS:>{epoch_len}}] ' + f'train_loss: {train_loss:.5f} ' + f'train_score: {train_score:.5f} ' + f'valid_loss: {val_loss:.5f} ' + f'valid_score: {val_score:.5f}' ) print(print_msg) early_stopping(val_score, model) if early_stopping.early_stop: print("Early stopping") break
def main(): global avg_train_losses print(f"Training with {BASE_MODEL}") model = MODEL_DISPATCHER[BASE_MODEL](pretrained=True) model.to(DEVICE) train_dataset = BengaliDatasetTrain(folds=TRAINING_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=4) #Validation valid_dataset = BengaliDatasetTrain(folds=VALIDATION_FOLDS, img_height=IMG_HEIGHT, img_width=IMG_WIDTH, mean=MODEL_MEAN, std=MODEL_STD) valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=4) #********** experiment with different optimizers optimizer = torch.optim.Adam( model.parameters(), lr=1e-4 ) # Use can use differential learning rate for different layer and experiment with it schedular = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=5, factor=0.3, verbose=True) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) os.makedirs("../input/bengali_models/", exist_ok=True) logFile = open("../input/bengali_models/validation_log.txt", "a") logFile.write(f"\n Run Time: {str(datetime.datetime.now())}") #****** can also add Early stopping https://github.com/Bjarten/early-stopping-pytorch for epoch in range(EPOCHS): train(train_dataset, train_loader, model, optimizer) val_score = evaluate(valid_dataset, valid_loader, model) avg_train_loss = np.average(train_losses) avg_train_losses.append(avg_train_loss) schedular.step(val_score) print( f"{epoch} Epoch Validation Score: {val_score} avg train loss:{avg_train_loss}" ) logFile.write( f" {BASE_MODEL}_fold{VALIDATION_FOLDS[0]}_{epoch} Epoch Validation Score: {val_score} avg train loss:{avg_train_loss} " ) torch.save( model.state_dict(), f"../input/bengali_models/{BASE_MODEL}_fold{VALIDATION_FOLDS[0]}.bin" ) logFile.close()