Exemple #1
0
def hp_search(trial: optuna.Trial):
    if torch.cuda.is_available():
        logger.info("%s", torch.cuda.get_device_name(0))

    global gopt
    opt = gopt
    # set config
    config = load_config(opt)
    config['opt'] = opt
    logger.info("%s", config)

    # set path
    set_path(config)

    # set search spaces
    lr = trial.suggest_loguniform('lr', 1e-6, 1e-3) # .suggest_float('lr', 1e-6, 1e-3, log=True)
    bsz = trial.suggest_categorical('batch_size', [32, 64, 128])
    seed = trial.suggest_int('seed', 17, 42)
    epochs = trial.suggest_int('epochs', 1, opt.epoch)

    # prepare train, valid dataset
    train_loader, valid_loader = prepare_datasets(config, hp_search_bsz=bsz)

    with temp_seed(seed):
        # prepare model
        model = prepare_model(config)
        # create optimizer, scheduler, summary writer, scaler
        optimizer, scheduler, writer, scaler = prepare_osws(config, model, train_loader, hp_search_lr=lr)
        config['optimizer'] = optimizer
        config['scheduler'] = scheduler
        config['writer'] = writer
        config['scaler'] = scaler

        early_stopping = EarlyStopping(logger, patience=opt.patience, measure=opt.measure, verbose=1)
        best_eval_measure = float('inf') if opt.measure == 'loss' else -float('inf')
        for epoch in range(epochs):
            eval_loss, eval_acc = train_epoch(model, config, train_loader, valid_loader, epoch)

            if opt.measure == 'loss': eval_measure = eval_loss 
            else: eval_measure = eval_acc
            # early stopping
            if early_stopping.validate(eval_measure, measure=opt.measure): break
            if opt.measure == 'loss': is_best = eval_measure < best_eval_measure
            else: is_best = eval_measure > best_eval_measure
            if is_best:
                best_eval_measure = eval_measure
                early_stopping.reset(best_eval_measure)
            early_stopping.status()

            trial.report(eval_acc, epoch)
            if trial.should_prune():
                raise optuna.TrialPruned()
        return eval_acc
Exemple #2
0
def train(opt):
    if torch.cuda.is_available():
        logger.info("%s", torch.cuda.get_device_name(0))

    # set etc
    torch.autograd.set_detect_anomaly(True)

    # set config
    config = load_config(opt)
    config['opt'] = opt
    logger.info("%s", config)

    # set path
    set_path(config)

    # prepare train, valid dataset
    train_loader, valid_loader = prepare_datasets(config)

    with temp_seed(opt.seed):
        # prepare model
        model = prepare_model(config)

        # create optimizer, scheduler, summary writer, scaler
        optimizer, scheduler, writer, scaler = prepare_osws(
            config, model, train_loader)
        config['optimizer'] = optimizer
        config['scheduler'] = scheduler
        config['writer'] = writer
        config['scaler'] = scaler

        # training
        early_stopping = EarlyStopping(logger,
                                       patience=opt.patience,
                                       measure='f1',
                                       verbose=1)
        local_worse_epoch = 0
        best_eval_f1 = -float('inf')
        for epoch_i in range(opt.epoch):
            epoch_st_time = time.time()
            eval_loss, eval_f1, best_eval_f1 = train_epoch(
                model, config, train_loader, valid_loader, epoch_i,
                best_eval_f1)
            # early stopping
            if early_stopping.validate(eval_f1, measure='f1'): break
            if eval_f1 == best_eval_f1:
                early_stopping.reset(best_eval_f1)
            early_stopping.status()
Exemple #3
0
def train(opt):
    if torch.cuda.is_available():
        logger.info("%s", torch.cuda.get_device_name(0))

    # set etc
    torch.autograd.set_detect_anomaly(True)

    # set config
    config = load_config(opt)
    config['opt'] = opt
    logger.info("%s", config)
 
    # set path
    set_path(config)
  
    # prepare train, valid dataset
    train_loader, valid_loader = prepare_datasets(config)

    with temp_seed(opt.seed):
        # prepare model
        model = prepare_model(config)

        # create optimizer, scheduler, summary writer, scaler
        optimizer, scheduler, writer, scaler = prepare_osws(config, model, train_loader)
        config['optimizer'] = optimizer
        config['scheduler'] = scheduler
        config['writer'] = writer
        config['scaler'] = scaler

        # training
        early_stopping = EarlyStopping(logger, patience=opt.patience, measure='f1', verbose=1)
        local_worse_steps = 0
        prev_eval_f1 = -float('inf')
        best_eval_f1 = -float('inf')
        for epoch_i in range(opt.epoch):
            epoch_st_time = time.time()
            eval_loss, eval_f1 = train_epoch(model, config, train_loader, valid_loader, epoch_i)
            # early stopping
            if early_stopping.validate(eval_f1, measure='f1'): break
            if eval_f1 > best_eval_f1:
                best_eval_f1 = eval_f1
                if opt.save_path:
                    logger.info("[Best model saved] : {:10.6f}".format(best_eval_f1))
                    save_model(config, model)
                    # save finetuned bert model/config/tokenizer
                    if config['emb_class'] in ['bert', 'distilbert', 'albert', 'roberta', 'bart', 'electra']:
                        if not os.path.exists(opt.bert_output_dir):
                            os.makedirs(opt.bert_output_dir)
                        model.bert_tokenizer.save_pretrained(opt.bert_output_dir)
                        model.bert_model.save_pretrained(opt.bert_output_dir)
                early_stopping.reset(best_eval_f1)
            early_stopping.status()
            # begin: scheduling, apply rate decay at the measure(ex, loss) getting worse for the number of deacy epoch steps.
            if prev_eval_f1 >= eval_f1:
                local_worse_steps += 1
            else:
                local_worse_steps = 0
            logger.info('Scheduler: local_worse_steps / opt.lr_decay_steps = %d / %d' % (local_worse_steps, opt.lr_decay_steps))
            if not opt.use_transformers_optimizer and \
               epoch_i > opt.warmup_epoch and \
               (local_worse_steps >= opt.lr_decay_steps or early_stopping.step() > opt.lr_decay_steps):
                scheduler.step()
                local_worse_steps = 0
            prev_eval_f1 = eval_f1
Exemple #4
0
def hp_search_optuna(trial: optuna.Trial):

    global gargs
    args = gargs
    # set config
    config = load_config(args)
    config['args'] = args
    logger.info("%s", config)

    # set path
    set_path(config)

    # create accelerator
    accelerator = Accelerator()
    config['accelerator'] = accelerator
    args.device = accelerator.device

    # set search spaces
    lr = trial.suggest_float('lr', 1e-5, 1e-3, log=True)
    bsz = trial.suggest_categorical('batch_size', [8, 16, 32, 64])
    seed = trial.suggest_int('seed', 17, 42)
    epochs = trial.suggest_int('epochs', 1, args.epoch)

    # prepare train, valid dataset
    train_loader, valid_loader = prepare_datasets(config, hp_search_bsz=bsz)

    with temp_seed(seed):
        # prepare model
        model = prepare_model(config)

        # create optimizer, scheduler, summary writer
        model, optimizer, scheduler, writer = prepare_others(config,
                                                             model,
                                                             train_loader,
                                                             lr=lr)
        # create secondary optimizer, scheduler
        _, optimizer_2nd, scheduler_2nd, _ = prepare_others(
            config, model, train_loader, lr=args.bert_lr_during_freezing)
        train_loader = accelerator.prepare(train_loader)
        valid_loader = accelerator.prepare(valid_loader)

        config['optimizer'] = optimizer
        config['scheduler'] = scheduler
        config['optimizer_2nd'] = optimizer_2nd
        config['scheduler_2nd'] = scheduler_2nd
        config['writer'] = writer

        total_batch_size = args.batch_size * accelerator.num_processes * args.gradient_accumulation_steps
        logger.info("***** Running training *****")
        logger.info(f"  Num examples = {len(train_loader)}")
        logger.info(f"  Num Epochs = {args.epoch}")
        logger.info(
            f"  Instantaneous batch size per device = {args.batch_size}")
        logger.info(
            f"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}"
        )
        logger.info(
            f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}"
        )
        logger.info(f"  Total optimization steps = {args.max_train_steps}")

        early_stopping = EarlyStopping(logger,
                                       patience=args.patience,
                                       measure='f1',
                                       verbose=1)
        best_eval_f1 = -float('inf')
        for epoch in range(epochs):
            eval_loss, eval_f1, best_eval_f1 = train_epoch(
                model, config, train_loader, valid_loader, epoch, best_eval_f1)

            # early stopping
            if early_stopping.validate(eval_f1, measure='f1'): break
            if eval_f1 == best_eval_f1:
                early_stopping.reset(best_eval_f1)
            early_stopping.status()

            trial.report(eval_f1, epoch)
            if trial.should_prune():
                raise optuna.TrialPruned()
        return eval_f1
Exemple #5
0
def train(args):

    # set etc
    torch.autograd.set_detect_anomaly(False)

    # set config
    config = load_config(args)
    config['args'] = args
    logger.info("%s", config)

    # set path
    set_path(config)

    # create accelerator
    accelerator = Accelerator()
    config['accelerator'] = accelerator
    args.device = accelerator.device

    # prepare train, valid dataset
    train_loader, valid_loader = prepare_datasets(config)

    with temp_seed(args.seed):
        # prepare model
        model = prepare_model(config)

        # create optimizer, scheduler, summary writer
        model, optimizer, scheduler, writer = prepare_others(
            config, model, train_loader)
        # create secondary optimizer, scheduler
        _, optimizer_2nd, scheduler_2nd, _ = prepare_others(
            config, model, train_loader, lr=args.bert_lr_during_freezing)
        train_loader = accelerator.prepare(train_loader)
        valid_loader = accelerator.prepare(valid_loader)

        config['optimizer'] = optimizer
        config['scheduler'] = scheduler
        config['optimizer_2nd'] = optimizer_2nd
        config['scheduler_2nd'] = scheduler_2nd
        config['writer'] = writer

        total_batch_size = args.batch_size * accelerator.num_processes * args.gradient_accumulation_steps
        logger.info("***** Running training *****")
        logger.info(f"  Num examples = {len(train_loader)}")
        logger.info(f"  Num Epochs = {args.epoch}")
        logger.info(
            f"  Instantaneous batch size per device = {args.batch_size}")
        logger.info(
            f"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}"
        )
        logger.info(
            f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}"
        )
        logger.info(f"  Total optimization steps = {args.max_train_steps}")

        # training
        early_stopping = EarlyStopping(logger,
                                       patience=args.patience,
                                       measure='f1',
                                       verbose=1)
        local_worse_epoch = 0
        best_eval_f1 = -float('inf')
        for epoch_i in range(args.epoch):
            epoch_st_time = time.time()
            eval_loss, eval_f1, best_eval_f1 = train_epoch(
                model, config, train_loader, valid_loader, epoch_i,
                best_eval_f1)
            # early stopping
            if early_stopping.validate(eval_f1, measure='f1'): break
            if eval_f1 == best_eval_f1:
                early_stopping.reset(best_eval_f1)
            early_stopping.status()
Exemple #6
0
def train(opt):
    if torch.cuda.is_available():
        logger.info("%s", torch.cuda.get_device_name(0))

    # set etc
    torch.autograd.set_detect_anomaly(True)

    # set config
    config = load_config(opt)
    config['opt'] = opt
    logger.info("%s", config)

    # set path
    set_path(config)

    # prepare train, valid dataset
    train_loader, valid_loader = prepare_datasets(config)

    with temp_seed(opt.seed):
        # prepare model
        model = prepare_model(config)

        # create optimizer, scheduler, summary writer, scaler
        optimizer, scheduler, writer, scaler = prepare_osws(
            config, model, train_loader)
        config['optimizer'] = optimizer
        config['scheduler'] = scheduler
        config['writer'] = writer
        config['scaler'] = scaler

        # training
        early_stopping = EarlyStopping(logger,
                                       patience=opt.patience,
                                       measure=opt.measure,
                                       verbose=1)
        local_worse_epoch = 0
        best_eval_measure = float(
            'inf') if opt.measure == 'loss' else -float('inf')
        for epoch_i in range(opt.epoch):
            epoch_st_time = time.time()
            eval_loss, eval_acc, best_eval_measure = train_epoch(
                model, config, train_loader, valid_loader, epoch_i,
                best_eval_measure)
            # for nni
            if opt.hp_search_nni:
                nni.report_intermediate_result(eval_acc)
                logger.info('[eval_acc] : %g', eval_acc)
                logger.info('[Pipe send intermediate result done]')
            if opt.measure == 'loss': eval_measure = eval_loss
            else: eval_measure = eval_acc
            # early stopping
            if early_stopping.validate(eval_measure, measure=opt.measure):
                break
            if eval_measure == best_eval_measure:
                early_stopping.reset(best_eval_measure)
            early_stopping.status()
        # for nni
        if opt.hp_search_nni:
            nni.report_final_result(eval_acc)
            logger.info('[Final result] : %g', eval_acc)
            logger.info('[Send final result done]')