def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(name=hyperparams.experiment_name,
                     save_dir=hyperparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    early_stop = EarlyStopping(monitor='val_acc',
                               patience=3,
                               verbose=True,
                               mode='max')

    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor='val_loss',
                                 mode='min')

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        gpus=hparams.gpus,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)
예제 #2
0
def main(hparams):
    """
    Main training routine specific for this project
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = DSANet(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(name='dsanet_exp_{}_window={}_horizon={}'.format(
        hparams.data_name, hparams.window, hparams.horizon),
                     save_dir=hparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    early_stop = EarlyStopping(monitor='val_loss',
                               patience=5,
                               verbose=True,
                               mode='min')

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        experiment=exp,
        early_stop_callback=early_stop,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)

    print('View tensorboard logs by running\ntensorboard --logdir %s' %
          os.getcwd())
    print('and going to http://localhost:6006 on your browser')
예제 #3
0
def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    exp = Experiment(
        name=hparams.tt_name,
        debug=hparams.debug,
        save_dir=hparams.tt_save_path,
        version=hparams.hpc_exp_number,
        autosave=False,
        description=hparams.tt_description
    )

    exp.argparse(hparams)
    exp.save()

    # build model
    model = ExampleModel(hparams)

    # callbacks
    early_stop = EarlyStopping(
        monitor='val_acc',
        patience=3,
        mode='min',
        verbose=True,
    )

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_function=None,
        save_best_only=True,
        verbose=True,
        monitor='val_acc',
        mode='min'
    )

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
    )

    # train model
    trainer.fit(model)
def main(hparams):
    # load model
    model = MyModel(hparams)

    # init experiment
    exp = Experiment(
        name=hparams.experiment_name,
        save_dir=hparams.test_tube_save_path,
        autosave=False,
        description='baseline attn interval'
    )

    exp.argparse(hparams)
    exp.save()

    # define callbackes
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path,
                                        exp.name, exp.version)
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=5,
        verbose=True,
        mode='min'
    )

    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_best_only=True,
        verbose=True,
        monitor='pr',
        mode='max'
    )

    # init trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        gpus=hparams.gpus,
        val_check_interval=1
    )

    # start training
    trainer.fit(model)
예제 #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', choices=['srcnn', 'srgan'], required=True)
    parser.add_argument('--scale_factor', type=int, default=4)
    parser.add_argument('--batch_size', type=int, default=16)
    parser.add_argument('--patch_size', type=int, default=96)
    parser.add_argument('--gpus', type=str, default='0')
    opt = parser.parse_args()

    # load model class
    if opt.model == 'srcnn':
        Model = models.SRCNNModel
    elif opt.model == 'srgan':
        Model = models.SRGANModel

    # add model specific arguments to original parser
    parser = Model.add_model_specific_args(parser)
    opt = parser.parse_args()

    # instantiate experiment
    exp = Experiment(save_dir=f'./logs/{opt.model}')
    exp.argparse(opt)

    model = Model(opt)

    # define callbacks
    checkpoint_callback = ModelCheckpoint(
        filepath=exp.get_media_path(exp.name, exp.version),
    )

    # instantiate trainer
    trainer = Trainer(
        experiment=exp,
        max_nb_epochs=4000,
        add_log_row_interval=50,
        check_val_every_n_epoch=10,
        checkpoint_callback=checkpoint_callback,
        gpus=[int(i) for i in opt.gpus.split(',')]
    )

    # start training!
    trainer.fit(model)
예제 #6
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = torch.cuda.is_available()
    if hparams.disable_cuda:
        on_gpu = False

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    exp = Experiment(name=hparams.tt_name,
                     debug=hparams.debug,
                     save_dir=hparams.tt_save_path,
                     version=hparams.hpc_exp_number,
                     autosave=False,
                     description=hparams.tt_description)

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        cluster=cluster,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
    )

    # train model
    trainer.fit(model)
예제 #7
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = torch.cuda.is_available()
    if hparams.disable_cuda:
        on_gpu = False

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    exp = Experiment(name=hparams.tt_name,
                     debug=hparams.debug,
                     save_dir=hparams.tt_save_path,
                     version=hparams.hpc_exp_number,
                     autosave=False,
                     description=hparams.tt_description)

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(experiment=exp,
                      on_gpu=on_gpu,
                      cluster=cluster,
                      enable_tqdm=hparams.enable_tqdm,
                      overfit_pct=hparams.overfit,
                      track_grad_norm=hparams.track_grad_norm,
                      fast_dev_run=hparams.fast_dev_run,
                      check_val_every_n_epoch=hparams.check_val_every_n_epoch,
                      accumulate_grad_batches=hparams.accumulate_grad_batches,
                      process_position=process_position,
                      current_gpu_name=current_gpu,
                      checkpoint_callback=checkpoint,
                      early_stop_callback=early_stop,
                      enable_early_stop=hparams.enable_early_stop,
                      max_nb_epochs=hparams.max_nb_epochs,
                      min_nb_epochs=hparams.min_nb_epochs,
                      train_percent_check=hparams.train_percent_check,
                      val_percent_check=hparams.val_percent_check,
                      test_percent_check=hparams.test_percent_check,
                      val_check_interval=hparams.val_check_interval,
                      log_save_interval=hparams.log_save_interval,
                      add_log_row_interval=hparams.add_log_row_interval,
                      lr_scheduler_milestones=hparams.lr_scheduler_milestones)

    # train model
    trainer.fit(model)
예제 #8
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = hparams.gpus is not None and torch.cuda.is_available()

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    log_dir = os.path.dirname(os.path.realpath(__file__))
    exp = Experiment(name='test_tube_exp',
                     debug=True,
                     save_dir=log_dir,
                     version=0,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # gpus are ; separated for inside a node and , within nodes
    gpu_list = None
    if hparams.gpus is not None:
        gpu_list = [int(x) for x in hparams.gpus.split(';')]

    # configure trainer
    trainer = Trainer(experiment=exp,
                      cluster=cluster,
                      checkpoint_callback=checkpoint,
                      early_stop_callback=early_stop,
                      gpus=gpu_list)

    # train model
    trainer.fit(model)
예제 #9
0
파일: pl_voronoi.py 프로젝트: tmquan/iVoi
                                   patience=5,
                                   verbose=True,
                                   mode='auto')

        checkpoint = ModelCheckpoint(
            filepath=model_save_path,
            # save_best_only=True,
            # save_weights_only=True,
            verbose=True,
            monitor='val_loss',
            mode='auto',
            period=100,
        )

        #-----------------------------------------------------------------------
        # 4 INIT TRAINER
        #-----------------------------------------------------------------------
        trainer = Trainer(
            experiment=exp,
            checkpoint_callback=checkpoint,
            # early_stop_callback=early_stop,
            max_nb_epochs=EPOCH,
            gpus=args.gpu  #map(int, args.gpu.split(',')), #hparams.gpus,
            # distributed_backend='ddp'
        )

        #-----------------------------------------------------------------------
        # 5 START TRAINING
        #-----------------------------------------------------------------------
        trainer.fit(model)
        sys.exit()