예제 #1
0
파일: train.py 프로젝트: agatan/vampire
def main():
    parent_parser = HyperOptArgumentParser(strategy="grid_search", add_help=False)
    logdir = "logs"
    parent_parser.add_argument(
        "--test_tube_save_path", default=os.path.join(logdir, "test_tube_data")
    )
    parent_parser.add_argument(
        "--model_save_path", default=os.path.join(logdir, "model_weights")
    )
    parent_parser.add_argument(
        "--experiment_name", default=os.path.join(logdir, "vampire")
    )
    parser = VAMPIRE.add_model_specific_args(parent_parser, ".")
    hparams = parser.parse_args()

    model = VAMPIRE(hparams)

    exp = Experiment(
        name=hparams.experiment_name,
        save_dir=hparams.test_tube_save_path,
        autosave=False,
    )
    exp.argparse(hparams)
    exp.save()

    trainer = Trainer(experiment=exp, fast_dev_run=False)
    trainer.fit(model)
예제 #2
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------
    # when using grid search, it's possible for all models to start at once
    # and use the same test tube experiment version
    relative_node_id = int(os.environ['SLURM_NODEID'])
    sleep(relative_node_id + 1)

    # init experiment
    exp = Experiment(name=hyperparams.experiment_name,
                     save_dir=hyperparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    early_stop = EarlyStopping(monitor='val_acc',
                               patience=3,
                               verbose=True,
                               mode='max')

    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor='val_loss',
                                 mode='min')

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(experiment=exp,
                      cluster=cluster,
                      checkpoint_callback=checkpoint,
                      early_stop_callback=early_stop,
                      gpus=hparams.gpus,
                      nb_gpu_nodes=hyperparams.nb_gpu_nodes)

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)
예제 #3
0
def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(name=hyperparams.experiment_name,
                     save_dir=hyperparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    early_stop = EarlyStopping(monitor='val_acc',
                               patience=3,
                               verbose=True,
                               mode='max')

    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor='val_loss',
                                 mode='min')

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        gpus=hparams.gpus,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)
예제 #4
0
def main(hparams):
    """
    Main training routine specific for this project
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = DSANet(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(name='dsanet_exp_{}_window={}_horizon={}'.format(
        hparams.data_name, hparams.window, hparams.horizon),
                     save_dir=hparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    early_stop = EarlyStopping(monitor='val_loss',
                               patience=5,
                               verbose=True,
                               mode='min')

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        experiment=exp,
        early_stop_callback=early_stop,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)

    print('View tensorboard logs by running\ntensorboard --logdir %s' %
          os.getcwd())
    print('and going to http://localhost:6006 on your browser')
예제 #5
0
def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    exp = Experiment(
        name=hparams.tt_name,
        debug=hparams.debug,
        save_dir=hparams.tt_save_path,
        version=hparams.hpc_exp_number,
        autosave=False,
        description=hparams.tt_description
    )

    exp.argparse(hparams)
    exp.save()

    # build model
    model = ExampleModel(hparams)

    # callbacks
    early_stop = EarlyStopping(
        monitor='val_acc',
        patience=3,
        mode='min',
        verbose=True,
    )

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_function=None,
        save_best_only=True,
        verbose=True,
        monitor='val_acc',
        mode='min'
    )

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
    )

    # train model
    trainer.fit(model)
def main(hparams):
    # load model
    model = MyModel(hparams)

    # init experiment
    exp = Experiment(
        name=hparams.experiment_name,
        save_dir=hparams.test_tube_save_path,
        autosave=False,
        description='baseline attn interval'
    )

    exp.argparse(hparams)
    exp.save()

    # define callbackes
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path,
                                        exp.name, exp.version)
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=5,
        verbose=True,
        mode='min'
    )

    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_best_only=True,
        verbose=True,
        monitor='pr',
        mode='max'
    )

    # init trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        gpus=hparams.gpus,
        val_check_interval=1
    )

    # start training
    trainer.fit(model)
예제 #7
0
def run_test():
    # model = MyModel.load_from_metrics(
    #     weights_path='experiments/saved_batch_attn_rnn/model_weights/mimic_all/1/_ckpt_epoch_10.ckpt',
    #     tags_csv='experiments/saved_batch_attn_rnn/test_tube_data/mimic_all/version_1/meta_tags.csv',
    #     on_gpu=True,
    #     map_location=None
    # )
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/rel_interval/model_weights/inner/1/_ckpt_epoch_9.ckpt',
    #        tags_csv='saved/rel_interval/test_tube_data/inner/version_1/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=torch.device('cuda')
    #    )
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/rel_interval/model_weights/nash/1/_ckpt_epoch_4.ckpt',
    #        tags_csv='saved/rel_interval/test_tube_data/nash/version_1/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=torch.device('cuda')
    #    )
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/rel_interval/model_weights/nash/6/_ckpt_epoch_4.ckpt',
    #        tags_csv='saved/rel_interval/test_tube_data/nash/version_6/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=torch.device('cuda')
    #    )
    #    trainer = Trainer()
    #    trainer.test(model)
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/rel_interval/model_weights/nash/7/_ckpt_epoch_2.ckpt',
    #        tags_csv='saved/rel_interval/test_tube_data/nash/version_7/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=torch.device('cuda')
    #    )
    #    trainer = Trainer()
    #    trainer.test(model)
    model = MyModel.load_from_metrics(
        weights_path=
        'saved_seed1/rel_interval_layer_varient/model_weights/ad/0/_ckpt_epoch_8.ckpt',
        tags_csv=
        'saved_seed1/rel_interval_layer_varient/test_tube_data/ad/version_0/meta_tags.csv',
        on_gpu=True,
        map_location=torch.device('cuda'))
    trainer = Trainer()
    trainer.test(model)
예제 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', choices=['srcnn', 'srgan'], required=True)
    parser.add_argument('--scale_factor', type=int, default=4)
    parser.add_argument('--batch_size', type=int, default=16)
    parser.add_argument('--patch_size', type=int, default=96)
    parser.add_argument('--gpus', type=str, default='0')
    opt = parser.parse_args()

    # load model class
    if opt.model == 'srcnn':
        Model = models.SRCNNModel
    elif opt.model == 'srgan':
        Model = models.SRGANModel

    # add model specific arguments to original parser
    parser = Model.add_model_specific_args(parser)
    opt = parser.parse_args()

    # instantiate experiment
    exp = Experiment(save_dir=f'./logs/{opt.model}')
    exp.argparse(opt)

    model = Model(opt)

    # define callbacks
    checkpoint_callback = ModelCheckpoint(
        filepath=exp.get_media_path(exp.name, exp.version),
    )

    # instantiate trainer
    trainer = Trainer(
        experiment=exp,
        max_nb_epochs=4000,
        add_log_row_interval=50,
        check_val_every_n_epoch=10,
        checkpoint_callback=checkpoint_callback,
        gpus=[int(i) for i in opt.gpus.split(',')]
    )

    # start training!
    trainer.fit(model)
예제 #9
0
def run_test():
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/retain/model_weights/inner/7/_ckpt_epoch_9.ckpt',
    #        tags_csv='saved/retain/test_tube_data/inner/version_7/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=None,
    #    )
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/retain/model_weights/nash/2/_ckpt_epoch_22.ckpt',
    #        tags_csv='saved/retain/test_tube_data/nash/version_2/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=None,
    #    )
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/retain/model_weights/nash/6/_ckpt_epoch_4.ckpt',
    #        tags_csv='saved/retain/test_tube_data/nash/version_6/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=torch.device('cuda'),
    #    )
    #    trainer = Trainer()
    #    trainer.test(model)
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/retain/model_weights/nash/7/_ckpt_epoch_4.ckpt',
    #        tags_csv='saved/retain/test_tube_data/nash/version_7/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=torch.device('cuda'),
    #    )
    #    trainer = Trainer()
    #    trainer.test(model)
    model = MyModel.load_from_metrics(
        weights_path='saved_seed1/retain/model_weights/ad/1/_ckpt_epoch_8.ckpt',
        tags_csv='saved_seed1/retain/test_tube_data/ad/version_1/meta_tags.csv',
        on_gpu=True,
        map_location=torch.device('cuda'),
    )
    trainer = Trainer()
    trainer.test(model)
def run_test():
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/tlstm/model_weights/nash/4/_ckpt_epoch_22.ckpt',
    #        tags_csv='saved/tlstm/test_tube_data/nash/version_4/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=torch.device('cuda')
    #    )
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/tlstm/model_weights/ad/0/_ckpt_epoch_8.ckpt',
    #        tags_csv='saved/tlstm/test_tube_data/ad/version_0/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=torch.device('cuda')
    #    )
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/tlstm/model_weights/nash/8/_ckpt_epoch_6.ckpt',
    #        tags_csv='saved/tlstm/test_tube_data/nash/version_10/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=torch.device('cuda')
    #    )
    #    trainer = Trainer()
    #    trainer.test(model)
    #    model = MyModel.load_from_metrics(
    #        weights_path='saved/tlstm/model_weights/nash/9/_ckpt_epoch_6.ckpt',
    #        tags_csv='saved/tlstm/test_tube_data/nash/version_9/meta_tags.csv',
    #        on_gpu=True,
    #        map_location=torch.device('cuda')
    #    )
    #    trainer = Trainer()
    #    trainer.test(model)
    model = MyModel.load_from_metrics(
        weights_path='saved_seed1/tlstm/model_weights/ad/2/_ckpt_epoch_7.ckpt',
        tags_csv='saved_seed1/tlstm/test_tube_data/ad/version_2/meta_tags.csv',
        on_gpu=True,
        map_location=torch.device('cuda'))
    trainer = Trainer()
    trainer.test(model)
예제 #11
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = torch.cuda.is_available()
    if hparams.disable_cuda:
        on_gpu = False

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    exp = Experiment(name=hparams.tt_name,
                     debug=hparams.debug,
                     save_dir=hparams.tt_save_path,
                     version=hparams.hpc_exp_number,
                     autosave=False,
                     description=hparams.tt_description)

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        cluster=cluster,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
    )

    # train model
    trainer.fit(model)
예제 #12
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = torch.cuda.is_available()
    if hparams.disable_cuda:
        on_gpu = False

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    exp = Experiment(name=hparams.tt_name,
                     debug=hparams.debug,
                     save_dir=hparams.tt_save_path,
                     version=hparams.hpc_exp_number,
                     autosave=False,
                     description=hparams.tt_description)

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(experiment=exp,
                      on_gpu=on_gpu,
                      cluster=cluster,
                      enable_tqdm=hparams.enable_tqdm,
                      overfit_pct=hparams.overfit,
                      track_grad_norm=hparams.track_grad_norm,
                      fast_dev_run=hparams.fast_dev_run,
                      check_val_every_n_epoch=hparams.check_val_every_n_epoch,
                      accumulate_grad_batches=hparams.accumulate_grad_batches,
                      process_position=process_position,
                      current_gpu_name=current_gpu,
                      checkpoint_callback=checkpoint,
                      early_stop_callback=early_stop,
                      enable_early_stop=hparams.enable_early_stop,
                      max_nb_epochs=hparams.max_nb_epochs,
                      min_nb_epochs=hparams.min_nb_epochs,
                      train_percent_check=hparams.train_percent_check,
                      val_percent_check=hparams.val_percent_check,
                      test_percent_check=hparams.test_percent_check,
                      val_check_interval=hparams.val_check_interval,
                      log_save_interval=hparams.log_save_interval,
                      add_log_row_interval=hparams.add_log_row_interval,
                      lr_scheduler_milestones=hparams.lr_scheduler_milestones)

    # train model
    trainer.fit(model)
예제 #13
0
def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = hparams.gpus is not None and torch.cuda.is_available()

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    log_dir = os.path.dirname(os.path.realpath(__file__))
    exp = Experiment(name='test_tube_exp',
                     debug=True,
                     save_dir=log_dir,
                     version=0,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # gpus are ; separated for inside a node and , within nodes
    gpu_list = None
    if hparams.gpus is not None:
        gpu_list = [int(x) for x in hparams.gpus.split(';')]

    # configure trainer
    trainer = Trainer(experiment=exp,
                      cluster=cluster,
                      checkpoint_callback=checkpoint,
                      early_stop_callback=early_stop,
                      gpus=gpu_list)

    # train model
    trainer.fit(model)
예제 #14
0
파일: pl_voronoi.py 프로젝트: tmquan/iVoi
                                   patience=5,
                                   verbose=True,
                                   mode='auto')

        checkpoint = ModelCheckpoint(
            filepath=model_save_path,
            # save_best_only=True,
            # save_weights_only=True,
            verbose=True,
            monitor='val_loss',
            mode='auto',
            period=100,
        )

        #-----------------------------------------------------------------------
        # 4 INIT TRAINER
        #-----------------------------------------------------------------------
        trainer = Trainer(
            experiment=exp,
            checkpoint_callback=checkpoint,
            # early_stop_callback=early_stop,
            max_nb_epochs=EPOCH,
            gpus=args.gpu  #map(int, args.gpu.split(',')), #hparams.gpus,
            # distributed_backend='ddp'
        )

        #-----------------------------------------------------------------------
        # 5 START TRAINING
        #-----------------------------------------------------------------------
        trainer.fit(model)
        sys.exit()
예제 #15
0
model_save_path = '{}/{}/{}'.format(hyperparams.model_save_path, exp.name,
                                    exp.version)
early_stop = EarlyStopping(monitor='val_acc',
                           patience=3,
                           verbose=True,
                           mode='max')

checkpoint = ModelCheckpoint(filepath=model_save_path,
                             save_best_only=True,
                             verbose=True,
                             monitor='val_loss',
                             mode='min')

# # Trainerを初期化する

# In[9]:

trainer = Trainer(experiment=exp,
                  checkpoint_callback=checkpoint,
                  early_stop_callback=early_stop,
                  gpus=hyperparams.gpus)

# # 学習を開始!

# In[10]:

trainer.fit(model)

# In[ ]: