Exemple #1
0
def get_LR(model, trainloader, optimizer, criterion, device):

    print("########## Tweaked version from fastai ###########")
    lr_find = LRFinder(model, optimizer, criterion, device="cuda")
    lr_find.range_test(trainloader, end_lr=1, num_iter=100)
    lr_find.plot()  # to inspect the loss-learning rate graph
    lr_find.reset()
    for index in range(len(lr_find.history['loss'])):
        item = lr_find.history['loss'][index]
        if item == lr_find.best_loss:
            min_val_index = index
            print(f"{min_val_index}")

    lr_find.plot(show_lr=lr_find.history['lr'][75])
    lr_find.plot(show_lr=lr_find.history['lr'][min_val_index])

    val_index = 75
    mid_val_index = math.floor((val_index + min_val_index) / 2)
    show_lr = [{
        'data': lr_find.history['lr'][val_index],
        'linestyle': 'dashed'
    }, {
        'data': lr_find.history['lr'][mid_val_index],
        'linestyle': 'solid'
    }, {
        'data': lr_find.history['lr'][min_val_index],
        'linestyle': 'dashed'
    }]
    # lr_find.plot_best_lr(skip_start=10, skip_end=5, log_lr=True, show_lr=show_lr, ax=None)

    best_lr = lr_find.history['lr'][mid_val_index]
    print(f"LR to be used: {best_lr}")
    return best_lr
Exemple #2
0
def lr_finder(model, train_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.Adam(model.parameters(), lr=0.0000001)
    lr_finder = LRFinder(model, optimizer_ft, criterion, device=device)
    lr_finder.range_test(train_loader, end_lr=1, num_iter=1000)
    lr_finder.reset()
    lr_finder.plot()
Exemple #3
0
def lr_finder(model, optimizer, criterion, trainloader):
    lr_finder = LRFinder(model, optimizer, criterion, device="cuda")
    lr_finder.range_test(trainloader,
                         end_lr=100,
                         num_iter=100,
                         step_mode="exp")
    lr_finder.plot()  #to plot the loss vs Learning Rate curve
    lr_finder.reset()  # to reset the lr_finder
Exemple #4
0
def executeLr_finder(model, optimizer, device, trainloader, criterion):

    #finding and plotting the best LR
    lr_finder = LRFinder(model, optimizer, criterion, device="cuda")
    lr_finder.range_test(trainloader,
                         end_lr=100,
                         num_iter=100,
                         step_mode="exp")
    lr_finder.plot()  # to inspect the loss-learning rate graph

    lr_finder.reset(
    )  # to reset the model and optimizer to their initial state
Exemple #5
0
def lr_finder(net, optimizer, loss_fun, trainloader, testloader):
    # Using LRFinder
    lr_finder = LRFinder(net, optimizer, loss_fun, device='cuda')
    lr_finder.range_test(trainloader,
                         val_loader=testloader,
                         start_lr=1e-3,
                         end_lr=0.1,
                         num_iter=100,
                         step_mode='exp')
    lr_finder.plot(log_lr=False)
    lr_finder.reset(
    )  # important to restore the model and optimizer's parameters to its initial state

    return lr_finder.history
def get_LR(model, trainloader, optimizer, criterion, device, testloader=None):

    # print("########## Tweaked version from fastai ###########")
    # lr_find = LRFinder(model, optimizer, criterion, device="cuda")
    # lr_find.range_test(trainloader, end_lr=100, num_iter=100)
    # best_lr=lr_find.plot()  # to inspect the loss-learning rate graph
    # lr_find.reset()
    # return best_lr

    # print("########## Tweaked version from fastai ###########")
    # lr_find = LRFinder(model, optimizer, criterion, device="cuda")
    # lr_find.range_test(trainloader, end_lr=1, num_iter=100)
    # lr_find.plot() # to inspect the loss-learning rate graph
    # lr_find.reset()
    # for index in range(len(lr_find.history['loss'])):
    #   item = lr_find.history['loss'][index]
    #   if item == lr_find.best_loss:
    #     min_val_index = index
    #     print(f"{min_val_index}")
    #
    # lr_find.plot(show_lr=lr_find.history['lr'][75])
    # lr_find.plot(show_lr=lr_find.history['lr'][min_val_index])
    #
    # val_index = 75
    # mid_val_index = math.floor((val_index + min_val_index)/2)
    # show_lr=[{'data': lr_find.history['lr'][val_index], 'linestyle': 'dashed'}, {'data': lr_find.history['lr'][mid_val_index], 'linestyle': 'solid'}, {'data': lr_find.history['lr'][min_val_index], 'linestyle': 'dashed'}]
    # # lr_find.plot_best_lr(skip_start=10, skip_end=5, log_lr=True, show_lr=show_lr, ax=None)
    #
    # best_lr = lr_find.history['lr'][mid_val_index]
    # print(f"LR to be used: {best_lr}")
    #
    # return best_lr

    print("########## Leslie Smith's approach ###########")
    lr_find = LRFinder(model, optimizer, criterion, device="cuda")
    lr_find.range_test(trainloader,
                       val_loader=testloader,
                       end_lr=1,
                       num_iter=100,
                       step_mode="linear")
    best_lr = lr_find.plot(log_lr=False)
    lr_find.reset()
    return best_lr
def main(args):
    np.random.seed(432)
    torch.random.manual_seed(432)
    try:
        os.makedirs(args.outpath)
    except OSError:
        pass
    experiment_path = utils.get_new_model_path(args.outpath)
    print(experiment_path)

    train_writer = SummaryWriter(os.path.join(experiment_path, 'train_logs'))
    val_writer = SummaryWriter(os.path.join(experiment_path, 'val_logs'))
    trainer = train.Trainer(train_writer, val_writer)

    # todo: add config
    train_transform = data.build_preprocessing()
    eval_transform = data.build_preprocessing()

    trainds, evalds = data.build_dataset(args.datadir, None)
    trainds.transform = train_transform
    evalds.transform = eval_transform

    model = models.resnet34()
    opt = torch.optim.Adam(model.parameters(), lr=1e-8)

    trainloader = DataLoader(trainds,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=8,
                             pin_memory=True)
    evalloader = DataLoader(evalds,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=16,
                            pin_memory=True)

    #find lr fast ai
    criterion = torch.nn.BCEWithLogitsLoss()
    lr_finder = LRFinder(model, opt, criterion, device="cuda")
    #     lr_finder.range_test(trainloader, val_loader=evalloader, end_lr=1, num_iter=10, step_mode="exp")
    lr_finder.range_test(trainloader,
                         end_lr=100,
                         num_iter=100,
                         step_mode="exp")

    #plot graph fast ai
    skip_start = 6
    skip_end = 3
    lrs = lr_finder.history["lr"]
    losses = lr_finder.history["loss"]
    grad_norm = lr_finder.history["grad_norm"]

    #     ind = grad_norm.index(min(grad_norm))
    #     opt_lr = lrs[ind]
    #     print('LR with min grad_norm =', opt_lr)

    lrs = lrs[skip_start:-skip_end]
    losses = losses[skip_start:-skip_end]

    fig = plt.figure(figsize=(12, 9))
    plt.plot(lrs, losses)
    plt.xscale("log")
    plt.xlabel("Learning rate")
    plt.ylabel("Loss")
    train_writer.add_figure('loss_vs_lr', fig)

    lr_finder.reset()

    #     fixed_lr = 1e-3
    fixed_lr = 3e-4
    opt = torch.optim.Adam(model.parameters(), lr=fixed_lr)

    #     #new
    #     lr = 1e-3
    #     eta_min = 1e-5
    #     t_max = 10
    #     opt = torch.optim.Adam(model.parameters(), lr=lr)
    #     scheduler = CosineAnnealingLR(opt, T_max=t_max, eta_min=eta_min)
    #     #new

    #     one cycle for 5 ehoches
    #     scheduler = CosineAnnealingLR(opt, 519*4, eta_min=1e-4)
    scheduler = CosineAnnealingLR(opt, args.epochs)

    #     scheduler = CosineAnnealingLR(opt, 519, eta_min=1e-5)
    #     scheduler = StepLR(opt, step_size=3, gamma=0.1)

    state_list = []
    for epoch in range(args.epochs):
        #         t = epoch / args.epochs
        #         lr = np.exp((1 - t) * np.log(lr_begin) + t * np.log(lr_end))
        # выставляем lr для всех параметров
        trainer.train_epoch(model, opt, trainloader, fixed_lr, scheduler)
        #         trainer.train_epoch(model, opt, trainloader, 3e-4, scheduler)
        #         trainer.train_epoch(model, opt, trainloader, 9.0451e-4, scheduler)
        metrics = trainer.eval_epoch(model, evalloader)

        state = dict(
            epoch=epoch,
            model_state_dict=model.state_dict(),
            optimizer_state_dict=opt.state_dict(),
            loss=metrics['loss'],
            lwlrap=metrics['lwlrap'],
            global_step=trainer.global_step,
        )
        state_copy = copy.deepcopy(state)
        state_list.append(state_copy)
        export_path = os.path.join(experiment_path, 'last.pth')
        torch.save(state, export_path)

    # save the best path
    best_export_path = os.path.join(experiment_path, 'best.pth')

    max_lwlrap = 0
    max_lwlrap_ind = 0
    for i in range(args.epochs):
        if state_list[i]['lwlrap'] > max_lwlrap:
            max_lwlrap = state_list[i]['lwlrap']
            max_lwlrap_ind = i

    best_state = state_list[max_lwlrap_ind]
    torch.save(best_state, best_export_path)
def train_loop(folds, fold):

    if CFG.device == 'GPU':
        LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    valid_labels = valid_folds[CFG.target_cols].values

    train_dataset = TrainDataset(train_folds,
                                 transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds,
                                 transform=get_transforms(data='valid'))

    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers,
                              pin_memory=True,
                              drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size * 2,
                              shuffle=False,
                              num_workers=CFG.num_workers,
                              pin_memory=True,
                              drop_last=False)

    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler == 'ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer,
                                          mode='min',
                                          factor=CFG.factor,
                                          patience=CFG.patience,
                                          verbose=True,
                                          eps=CFG.eps)
        elif CFG.scheduler == 'CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer,
                                          T_max=CFG.T_max,
                                          eta_min=CFG.min_lr,
                                          last_epoch=-1)
        elif CFG.scheduler == 'CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer,
                                                    T_0=CFG.T_0,
                                                    T_mult=1,
                                                    eta_min=CFG.min_lr,
                                                    last_epoch=-1)
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = CustomModel(CFG.model_name, pretrained=False)
    model = torch.nn.DataParallel(model)
    model.load_state_dict(
        torch.load(f'{CFG.model_name}_student_fold{fold}_best_score.pth',
                   map_location=torch.device('cpu'))['model'])
    # model.load_state_dict(torch.load(f'0.9647/{CFG.model_name}_no_hflip_fold{fold}_best_score.pth', map_location=torch.device('cpu'))['model'])
    model.to(device)

    # criterion = nn.BCEWithLogitsLoss()
    criterion = FocalLoss(alpha=1, gamma=6)

    # optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    optimizer = SGD(model.parameters(),
                    lr=1e-2,
                    weight_decay=CFG.weight_decay,
                    momentum=0.9)

    find_lr = False
    if find_lr:
        from lr_finder import LRFinder
        lr_finder = LRFinder(model, optimizer, criterion, device=device)
        lr_finder.range_test(train_loader,
                             start_lr=1e-2,
                             end_lr=1e0,
                             num_iter=100,
                             accumulation_steps=1)

        fig_name = f'{CFG.model_name}_lr_finder.png'
        lr_finder.plot(fig_name)
        lr_finder.reset()
        return
    scheduler = get_scheduler(optimizer)
    swa_model = torch.optim.swa_utils.AveragedModel(model)
    swa_scheduler = torch.optim.swa_utils.SWALR(optimizer, swa_lr=1e-3)
    swa_start = 9

    # ====================================================
    # loop
    # ====================================================

    best_score = 0.
    best_loss = np.inf

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch,
                            scheduler, device)

        # eval
        # avg_val_loss, preds, _ = valid_fn(valid_loader, model, criterion, device)
        if epoch > swa_start:
            swa_model.update_parameters(model)
            swa_scheduler.step()
        else:
            if isinstance(scheduler, ReduceLROnPlateau):
                scheduler.step(avg_val_loss)
            elif isinstance(scheduler, CosineAnnealingLR):
                scheduler.step()
            elif isinstance(scheduler, CosineAnnealingWarmRestarts):
                scheduler.step()

        # scoring
        avg_val_loss, preds, _ = valid_fn(valid_loader, model, criterion,
                                          device)
        score, scores = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        LOGGER.info(
            f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s'
        )
        LOGGER.info(
            f'Epoch {epoch+1} - Score: {score:.4f}  Scores: {np.round(scores, decimals=4)}'
        )

        if score > best_score:
            best_score = score
            LOGGER.info(
                f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict()}, OUTPUT_DIR +
                       f'{CFG.model_name}_no_hflip_fold{fold}_best_score.pth')

        # if avg_val_loss < best_loss:
        #     best_loss = avg_val_loss
        #     LOGGER.info(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
        #     torch.save({'model': model.state_dict(),
        #                 'preds': preds},
        #                 OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best_loss.pth')

    torch.optim.swa_utils.update_bn(train_loader, swa_model)
    avg_val_loss, preds, _ = valid_fn(valid_loader, swa_model, criterion,
                                      device)
    score, scores = get_score(valid_labels, preds)
    LOGGER.info(f'Save swa Score: {score:.4f} Model')
    torch.save({'model': swa_model.state_dict()},
               OUTPUT_DIR + f'swa_{CFG.model_name}_fold{fold}_{score:.4f}.pth')
    # if CFG.nprocs != 8:
    #     check_point = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best_score.pth')
    #     for c in [f'pred_{c}' for c in CFG.target_cols]:
    #         valid_folds[c] = np.nan
    #     try:
    #         valid_folds[[f'pred_{c}' for c in CFG.target_cols]] = check_point['preds']
    #     except:
    #         pass

    return
Exemple #9
0
def main(args):
    # load train data into ram
    # data_path = '/mntlong/lanl_comp/data/'
    file_dir = os.path.dirname(__file__)
    data_path = os.path.abspath(os.path.join(file_dir, os.path.pardir, 'data'))
    train_info_path = os.path.join(data_path, 'train_info.csv')
    train_data_path = os.path.join(data_path, 'train_compressed.npz')

    train_info = pd.read_csv(train_info_path, index_col='Unnamed: 0')
    train_info['exp_len'] = train_info['indx_end'] - train_info['indx_start']

    train_signal = np.load(train_data_path)['signal']
    train_quaketime = np.load(train_data_path)['quake_time']

    # В валидацию берем 2 последних волны (части эксперимента)
    val_start_idx = train_info.iloc[-2, :]['indx_start']

    val_signal = train_signal[val_start_idx:]
    val_quaketime = train_quaketime[val_start_idx:]

    train_signal = train_signal[:val_start_idx]
    train_quaketime = train_quaketime[:val_start_idx]

    # training params
    large_ws = 1500000
    overlap_size = int(large_ws * 0.5)
    small_ws = 150000
    num_bins = 17

    cpc_meta_model = models.CPCv1(out_size=num_bins - 1)

    # logs_path = '/mntlong/scripts/logs/'
    logs_path = os.path.abspath(os.path.join(file_dir, os.path.pardir, 'logs'))
    current_datetime = datetime.today().strftime('%b-%d_%H-%M-%S')
    log_writer_path = os.path.join(logs_path, 'runs',
                                   current_datetime + '_' + args.model_name)

    train_dataset = data.SignalCPCDataset(
        train_signal,
        train_quaketime,
        num_bins=num_bins,
        idxs_wave_end=train_info['indx_end'].values,
        large_ws=large_ws,
        overlap_size=overlap_size,
        small_ws=small_ws)
    val_dataset = data.SignalCPCDataset(
        val_signal,
        val_quaketime,
        num_bins=num_bins,
        idxs_wave_end=train_info['indx_end'].values,
        large_ws=large_ws,
        overlap_size=overlap_size,
        small_ws=small_ws)

    print('x_t size:', train_dataset[0][0].size())

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=5,
                              pin_memory=True)
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=5,
                            pin_memory=True)

    if args.find_lr:
        from lr_finder import LRFinder
        optimizer = optim.Adam(cpc_meta_model.parameters(), lr=1e-6)
        lr_find = LRFinder(cpc_meta_model,
                           optimizer,
                           criterion=None,
                           is_cpc=True,
                           device='cuda')
        lr_find.range_test(train_loader,
                           end_lr=2,
                           num_iter=75,
                           step_mode='exp')
        best_lr = lr_find.get_best_lr()
        lr_find.plot()
        lr_find.reset()
        print('best lr found: {:.2e}'.format(best_lr))
    else:
        best_lr = 3e-4
    # sys.exit()

    # model_path = os.path.join(logs_path, 'cpc_no_target_head_cont_last_state.pth')
    # cpc_meta_model.load_state_dict(torch.load(model_path)['model_state_dict'])
    # cpc_meta_model.to(torch.device('cuda'))

    optimizer = optim.Adam(cpc_meta_model.parameters(), lr=best_lr)
    # optimizer.load_state_dict(torch.load(model_path)['optimizer_state_dict'])
    lr_sched = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                    factor=0.5,
                                                    patience=3,
                                                    threshold=0.005)

    log_writer = SummaryWriter(log_writer_path)

    utils.train_cpc_model(cpc_meta_model=cpc_meta_model,
                          optimizer=optimizer,
                          num_bins=num_bins,
                          lr_scheduler=lr_sched,
                          train_loader=train_loader,
                          val_loader=val_loader,
                          num_epochs=args.num_epochs,
                          model_name=args.model_name,
                          logs_path=logs_path,
                          log_writer=log_writer)
def main(args):
    # load train data into ram
    # data_path = '/mntlong/lanl_comp/data/'
    file_dir = os.path.dirname(__file__)
    data_path = os.path.abspath(os.path.join(file_dir, os.path.pardir, 'data'))
    train_info_path = os.path.join(data_path, 'train_info.csv')
    train_data_path = os.path.join(data_path, 'train_compressed.npz')

    train_info = pd.read_csv(train_info_path, index_col='Unnamed: 0')
    train_info['exp_len'] = train_info['indx_end'] - train_info['indx_start']

    train_signal = np.load(train_data_path)['signal']
    train_quaketime = np.load(train_data_path)['quake_time']

    # В валидацию берем 2 последних волны (части эксперимента)
    val_start_idx = train_info.iloc[-2, :]['indx_start']

    val_signal = train_signal[val_start_idx:]
    val_quaketime = train_quaketime[val_start_idx:]

    train_signal = train_signal[:val_start_idx]
    train_quaketime = train_quaketime[:val_start_idx]

    # training params
    window_size = 150000
    overlap_size = int(window_size * 0.5)
    num_bins = 17

    model = models.BaselineNetRawSignalCnnRnnV1(out_size=num_bins-1)
    loss_fn = nn.CrossEntropyLoss()  # L1Loss() SmoothL1Loss() MSELoss()

    # logs_path = '/mntlong/scripts/logs/'
    logs_path = os.path.abspath(os.path.join(file_dir, os.path.pardir, 'logs'))
    current_datetime = datetime.today().strftime('%b-%d_%H-%M-%S')
    log_writer_path = os.path.join(logs_path, 'runs',
                                   current_datetime + '_' + args.model_name)

    train_dataset = data.SignalDataset(train_signal, train_quaketime,
                                       num_bins=num_bins,
                                       idxs_wave_end=train_info['indx_end'].values,
                                       window_size=window_size,
                                       overlap_size=overlap_size)
    val_dataset = data.SignalDataset(val_signal, val_quaketime,
                                     num_bins=num_bins,
                                     idxs_wave_end=train_info['indx_end'].values,
                                     window_size=window_size,
                                     overlap_size=overlap_size)

    print('wave size:', train_dataset[0][0].size())

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=5,
                              pin_memory=True)
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=5,
                            pin_memory=True)

    if args.find_lr:
        from lr_finder import LRFinder
        optimizer = optim.Adam(model.parameters(), lr=1e-6)
        lr_find = LRFinder(model, optimizer, loss_fn, device='cuda')
        lr_find.range_test(train_loader, end_lr=1, num_iter=50, step_mode='exp')
        best_lr = lr_find.get_best_lr()
        lr_find.plot()
        lr_find.reset()
        print('best lr found: {:.2e}'.format(best_lr))
    else:
        best_lr = 3e-4

    optimizer = optim.Adam(model.parameters(), lr=best_lr)  # weight_decay=0.1
    lr_sched = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                    factor=0.5,
                                                    patience=3,
                                                    threshold=0.005)
    log_writer = SummaryWriter(log_writer_path)

    utils.train_clf_model(model=model, optimizer=optimizer, lr_scheduler=lr_sched,
                          train_loader=train_loader, val_loader=val_loader,
                          num_epochs=args.num_epochs, model_name=args.model_name,
                          logs_path=logs_path, log_writer=log_writer,
                          loss_fn=loss_fn, num_bins=num_bins)
Exemple #11
0
from lr_finder import LRFinder
from src.model_lib.MultiFTNet import MultiFTNet
from src.model_lib.MiniFASNet import MiniFASNetV1, MiniFASNetV2,MiniFASNetV1SE,MiniFASNetV2SE
from src.utility import get_kernel
from torch.nn import CrossEntropyLoss, MSELoss
from torch import optim
from src.data_io.dataset_loader import get_train_loader,get_eval_loader
from src.default_config import get_default_config, update_config
from train import parse_args
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
kernel_size = get_kernel(80, 60)
model = MultiFTNet(conv6_kernel = kernel_size)
cls_criterion = CrossEntropyLoss()
FT_criterion = MSELoss()
from torch import optim
# optimizer = optim.SGD(model.parameters(),
#                                    lr=0.1,
#                                    weight_decay=5e-4,
#                                    momentum=0.9)
optimizer = optim.AdamW(model.parameters())
lr_finder = LRFinder(model, optimizer, cls_criterion,FT_criterion)
conf = get_default_config()
args = parse_args()
conf = update_config(args, conf)
trainloader = get_train_loader(conf)
val_loader = get_eval_loader(conf)
lr_finder.range_test(trainloader, end_lr=1, num_iter=100, step_mode="linear")
lr_finder.plot(log_lr=False)
lr_finder.reset()