Python Experiment.save Examples, test_tube.Experiment.save Python Examples

Example #1

0

Show file

def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    exp = Experiment(
        name=hparams.tt_name,
        debug=hparams.debug,
        save_dir=hparams.tt_save_path,
        version=hparams.hpc_exp_number,
        autosave=False,
        description=hparams.tt_description
    )

    exp.argparse(hparams)
    exp.save()

    # build model
    model = ExampleModel(hparams)

    # callbacks
    early_stop = EarlyStopping(
        monitor='val_acc',
        patience=3,
        mode='min',
        verbose=True,
    )

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_function=None,
        save_best_only=True,
        verbose=True,
        monitor='val_acc',
        mode='min'
    )

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
    )

    # train model
    trainer.fit(model)

Example #2

0

Show file

File: single_cpu_trainer.py Project: ihergor/DSANet-master

def main(hparams):
    """
    Main training routine specific for this project
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = DSANet(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(name='dsanet_exp_{}_window={}_horizon={}'.format(
        hparams.data_name, hparams.window, hparams.horizon),
                     save_dir=hparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    early_stop = EarlyStopping(monitor='val_loss',
                               patience=5,
                               verbose=True,
                               mode='min')

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        experiment=exp,
        early_stop_callback=early_stop,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)

    print('View tensorboard logs by running\ntensorboard --logdir %s' %
          os.getcwd())
    print('and going to http://localhost:6006 on your browser')

Example #3

0

Show file

File: pytorch_hpc_example.py Project: quuhua911/test-tube

def train(hparams):
    # init exp and track all the parameters from the HyperOptArgumentParser
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)

    # pretend to train
    x = torch.rand((1, hparams.x_val))
    for train_step in range(0, 100):
        y = torch.rand((hparams.x_val, 1))
        out = x.mm(y)
        exp.log({'fake_err': out.item()})

    # save exp when we're done
    exp.save()

Example #4

0

Show file

def main(hparams, cluster):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------
    # when using grid search, it's possible for all models to start at once
    # and use the same test tube experiment version
    relative_node_id = int(os.environ['SLURM_NODEID'])
    sleep(relative_node_id + 1)

    # init experiment
    exp = Experiment(
        name=hyperparams.experiment_name,
        save_dir=hyperparams.test_tube_save_path,
        autosave=False,
        version=hparams.hpc_exp_number,  # match the slurm job version number
        description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(experiment=exp,
                      gpus=hparams.per_experiment_nb_gpus,
                      nb_gpu_nodes=hyperparams.nb_gpu_nodes,
                      distributed_backend=hyperparams.distributed_backend)

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)

Example #5

0

Show file

File: fc_nn.py Project: lcheng61/paper_FMhXSlwRYpUtuchTv

def main_trainer(hparams):
    print_params(hparams)
    full_exp = Experiment(name=hparams.tt_name+'_overall',
                     debug=hparams.debug,
                     autosave=False,
                     description=hparams.tt_description,
                     save_dir=hparams.tt_save_path)

    full_exp.add_argparse_meta(hparams)

    # fit model
    val_scores, train_scores = [], []
    best_acc = 0
    best_loss = 0
    best_trial_nb = 0
    for trial_nb in range(hparams.nb_trials):
        exp = Experiment(name=hparams.tt_name,
                         debug=hparams.debug,
                         autosave=False,
                         description=hparams.tt_description,
                         save_dir=hparams.tt_save_path)

        exp.add_argparse_meta(hparams)
        data = SequentialReadingsData(window_size=hparams.time_steps, data_path=hparams.data_path, flatten_x=True)

        val_loss, val_acc, history = fit_feedforward(hparams, exp, data.train_x, data.train_y, data.val_x, data.val_y, trial_nb)
        log_history(history.history, exp)

        exp.add_metric_row({'final_val_acc': val_acc, 'final_train_acc': val_loss})
        exp.save()

        full_exp.add_metric_row({'val_acc': val_acc, 'val_loss': val_loss, 'trial_nb': trial_nb})

        # save model when we have a better one
        if val_acc > best_acc:
            best_acc = val_acc
            best_loss = val_loss
            best_trial_nb = trial_nb

        val_scores.append(val_acc)

    mean_val_acc = np.mean(val_scores)
    full_exp.add_metric_row({'final_val_acc': mean_val_acc, 'best_val_loss': best_loss, 'best_val_acc': best_acc, 'best_trial_nb': best_trial_nb})
    full_exp.save()

Example #6

0

Show file

File: baseline_batch_interval.py Project: IJCAI2020-code/INTERVAL

def main(hparams):
    # load model
    model = MyModel(hparams)

    # init experiment
    exp = Experiment(
        name=hparams.experiment_name,
        save_dir=hparams.test_tube_save_path,
        autosave=False,
        description='baseline attn interval'
    )

    exp.argparse(hparams)
    exp.save()

    # define callbackes
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path,
                                        exp.name, exp.version)
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=5,
        verbose=True,
        mode='min'
    )

    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_best_only=True,
        verbose=True,
        monitor='pr',
        mode='max'
    )

    # init trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        gpus=hparams.gpus,
        val_check_interval=1
    )

    # start training
    trainer.fit(model)

Example #7

0

Show file

File: tensorflow_example.py Project: spark-lin/test-tube

def train(hparams):
    # init exp and track all the parameters from the HyperOptArgumentParser
    exp = Experiment(name='dense_model', save_dir='/some/path', autosave=False)
    exp.add_argparse_meta(hparams)

    # define tensorflow graph
    x = tf.placeholder(dtype=tf.int32, name='x')
    y = tf.placeholder(dtype=tf.int32, name='y')
    out = x * y

    sess = tf.Session()

    # Run the tf op
    for train_step in range(0, 100):
        output = sess.run(out, feed_dict={x: hparams.x_val, y: hparams.y_val})
        exp.add_metric_row({'fake_err': output})

    # save exp when we're done
    exp.save()

Example #8

0

Show file

def main(hparams, data):
    # init experiment
    log_dir = os.path.dirname(os.path.realpath(__file__))
    exp = Experiment(name=hparams.exp_name,
                     debug=False,
                     save_dir=log_dir,
                     version=0,
                     autosave=True,
                     description='P2R codebase')

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = P2rSystem(hparams, data)

    model_save_path = '{}/{}/version_{}/checkpoints'.format(
        exp.save_dir, exp.name, exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 verbose=True,
                                 monitor='tng_loss',
                                 mode='min',
                                 save_best_only=True)

    # configure trainer
    trainer = Trainer(experiment=exp,
                      checkpoint_callback=checkpoint,
                      min_nb_epochs=1,
                      max_nb_epochs=hparams.max_nb_epochs,
                      track_grad_norm=2,
                      accumulate_grad_batches=1,
                      row_log_interval=1,
                      amp_level='O2',
                      use_amp=True,
                      gpus=1)

    # train model
    trainer.fit(model)
    trainer.test()

    filepath = '{}/_ckpt_epoch_final.ckpt'.format(model_save_path)
    checkpoint.save_model(filepath, False)

Example #9

0

Show file

def main(hparams):

    exp = Experiment(
        name=hparams.tt_name,
        debug=hparams.debug,
        save_dir=hparams.tt_save_path,
        version=hparams.hpc_exp_number,
        autosave=False,
        description=hparams.tt_description,
    )

    exp.argparse(hparams)
    exp.save()

    model = AutoregressiveFaceVAE(hparams)

    early_stop = EarlyStopping(monitor="avg_val_loss",
                               patience=3,
                               verbose=True,
                               mode="min")

    model_save_path = "{}/{}/{}".format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_best_only=True,
        verbose=True,
        monitor="avg_val_loss",
        mode="min",
    )

    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        gpus=hparams.gpus,
        distributed_backend=hparams.dist_backend,
        # val_check_interval=0.5,
        # distributed_backend="dp",
        # overfit_pct=0.01
    )

    trainer.fit(model)

Example #10

0

Show file

def main(hparams):
    # init experiment
    experiment_args = parse_argdict_for_method(Experiment.__init__, hparams)
    exp = Experiment(**experiment_args)

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = Network(hparams)

    # callbacks
    if hparams.enable_early_stop:
        early_stop = EarlyStopping(monitor=hparams.monitor_value,
                                   patience=hparams.patience,
                                   verbose=True,
                                   mode=hparams.monitor_mode)
    else:
        early_stop = None

    if hparams.enable_model_checkpoint:
        model_save_path = pathlib.Path(exp.log_dir).parent / 'model_weights'
        checkpoint = ModelCheckpoint(
            filepath=model_save_path,
            save_best_only=hparams.save_best_only,
            save_weights_only=hparams.save_weights_only,
            verbose=True,
            monitor=hparams.monitor_value,
            mode=hparams.monitor_mode)
    else:
        checkpoint = None

    # configure trainer
    trainer_args = parse_argdict_for_method(Trainer.__init__, hparams)
    trainer = Trainer(experiment=exp,
                      early_stop_callback=early_stop,
                      checkpoint_callback=checkpoint,
                      **trainer_args)

    # train model
    trainer.fit(model)

Example #11

0

Show file

File: hmm_main.py Project: lcheng61/paper_FMhXSlwRYpUtuchTv

def main_trainer(hparams):
    print_params(hparams)

    exp = Experiment(name=hparams.tt_name,
                     debug=hparams.debug,
                     autosave=False,
                     description=hparams.tt_description,
                     save_dir=hparams.tt_save_path)

    exp.add_argparse_meta(hparams)

    # fit model
    val_scores = []
    best_score = 0
    for trial_nb in range(hparams.nb_trials):
        data = dataset_loader.IndividualSequencesData(
            hparams.data_path, y_labels=hparams.y_labels.split(','))
        X, Y, lengths = flatten_data(data.train_x_y)

        # fit
        model = hmm.GaussianHMM(n_components=hparams.nb_components,
                                n_iter=hparams.nb_hmm_iters)
        model.fit(X, lengths)

        val_X, val_Y, lengths = flatten_data(data.val_x_y)
        Y_hat = model.predict(val_X, lengths)
        val_score = np.equal(Y_hat, val_Y).sum() / float(len(Y_hat))

        # save model
        if val_score > best_score:
            best_score = val_score
            save_model(model, hparams, exp, trial_nb)

        val_scores.append(val_score)

        exp.add_metric_row({'val_acc': val_score, 'trail_nb': trial_nb})

    mean_val_acc = np.mean(val_scores)
    exp.add_metric_row({'final_val_acc': mean_val_acc})
    exp.save()

Example #12

0

Show file

File: single_gpu_node_dp_template.py Project: ysgcat/pytorch-lightning

def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT Logger
    # ------------------------
    # init experiment
    exp = Experiment(
        name=hyperparams.experiment_name,
        save_dir=hyperparams.test_tube_save_path,
        autosave=False,
        description='test demo'
    )

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        experiment=exp,
        gpus=hparams.gpus,
        distributed_backend=hparams.dist_backend,
    )

    # ------------------------
    # 4 START TRAINING
    # ------------------------
    trainer.fit(model)

Example #13

0

Show file

def create_tt_experiment(hparams):
    """Create test-tube experiment for logging training and storing models.

    Parameters
    ----------
    hparams : :obj:`dict`
        dictionary of hyperparameters defining experiment that will be saved as a csv file

    Returns
    -------
    :obj:`tuple`
        - if experiment defined by hparams already exists, returns :obj:`(None, None, None)`
        - if experiment does not exist, returns :obj:`(hparams, sess_ids, exp)`

    """
    from test_tube import Experiment

    # get session_dir
    hparams['session_dir'], sess_ids = get_session_dir(
        hparams, session_source=hparams.get('all_source', 'save'))
    if not os.path.isdir(hparams['session_dir']):
        os.makedirs(hparams['session_dir'])
        export_session_info_to_csv(hparams['session_dir'], sess_ids)
    hparams['expt_dir'] = get_expt_dir(hparams)
    if not os.path.isdir(hparams['expt_dir']):
        os.makedirs(hparams['expt_dir'])

    # check to see if experiment already exists
    if experiment_exists(hparams):
        return None, None, None

    exp = Experiment(
        name=hparams['experiment_name'],
        debug=False,
        save_dir=os.path.dirname(hparams['expt_dir']))
    exp.save()
    hparams['version'] = exp.version

    return hparams, sess_ids, exp

Example #14

0

Show file

File: random_forest_main.py Project: lcheng61/paper_FMhXSlwRYpUtuchTv

def main_trainer(hparams):
    print_params(hparams)

    exp = Experiment(name=hparams.tt_name,
                     debug=hparams.debug,
                     autosave=False,
                     description=hparams.tt_description,
                     save_dir=hparams.tt_save_path)

    exp.add_argparse_meta(hparams)

    # init data loader

    # fit model
    val_scores, train_scores = [], []
    best_score = 0
    for trial_nb in range(hparams.nb_trials):
        data = SequentialReadingsData(window_size=hparams.time_steps, data_path=hparams.data_path, flatten_x=True)

        clf = RandomForestClassifier(n_estimators=hparams.nb_estimators)
        clf.fit(data.train_x, data.train_y)

        train_score = clf.score(data.train_x, data.train_y)
        val_score = clf.score(data.val_x, data.val_y)

        # save model when we have a better one
        if val_score > best_score:
            best_score = val_score
            save_model(clf, hparams, exp, trial_nb)

        train_scores.append(train_score)
        val_scores.append(val_score)

        exp.add_metric_row({'val_acc': val_score, 'train_acc': train_score, 'trail_nb': trial_nb})

    mean_val_acc = np.mean(val_scores)
    mean_train_acc = np.mean(train_scores)
    exp.add_metric_row({'final_val_acc': mean_val_acc, 'final_train_acc': mean_train_acc})
    exp.save()

Example #15

0

Show file

def train(hparams, *args):
    """Train your awesome model.
    :param hparams: The arguments to run the model with.
    """
    # Initialize experiments and track all the hyperparameters
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        # Location to save the metrics.
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)

    # Pretend to train.
    x = torch.rand((1, hparams.x_val))
    for train_step in range(0, 100):
        y = torch.rand((hparams.x_val, 1))
        out = x.mm(y)
        exp.log({'fake_err': out.item()})

    # Save exp when .
    exp.save()

Example #16

0

Show file

def train(hparams):
    # init exp and track all the parameters from the HyperOptArgumentParser
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)

    # define tensorflow graph
    x = tf.placeholder(dtype=tf.int32, name='x')
    y = tf.placeholder(dtype=tf.int32, name='y')
    out = x * y

    sess = tf.Session()

    # Run the tf op
    for train_step in range(0, 100):
        output = sess.run(out, feed_dict={x: hparams.x_val, y: hparams.y_val})
        exp.log({'fake_err': output})

    # save exp when we're done
    exp.save()

Example #17

0

Show file

def main():
    model = CoolSystem()

    # PyTorch summarywriter with a few bells and whistles
    exp = Experiment(save_dir='../output/tmp')
    print(f"exp.save_dir: {exp.save_dir}")
    exp.save()
    print(f"saved !!!")

    # train on cpu using only 10% of the data (for demo purposes)
    # pass in experiment for automatic tensorboard logging.
    trainer = Trainer(experiment=exp, max_nb_epochs=1, train_percent_check=0.1)

    # train on 4 gpus (lightning chooses GPUs for you)
    # trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=4)

    # train on 4 gpus (you choose GPUs)
    # trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=[0, 1, 3, 7])

    # train on 32 gpus across 4 nodes (make sure to submit appropriate SLURM job)
    # trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=8, nb_gpu_nodes=4)

    # train (1 epoch only here for demo)
    trainer.fit(model)

Example #18

0

Show file

File: hopt_gpu_trainer.py Project: aleksei-mashlakov/multivariate-deep-learning

def optimize(optimizer_params):
    """
    Main training routine specific for this project
    """
    logging.basicConfig(level=logging.INFO)
    # dirs
    root_dir = os.path.dirname(os.path.realpath(__file__))
    demo_log_dir = os.path.join(root_dir, 'dsanet_logs')
    checkpoint_dir = os.path.join(demo_log_dir, 'model_weights')
    test_tube_dir = os.path.join(demo_log_dir, 'test_tube_data')

    # although we user hyperOptParser, we are using it only as argparse right now
    parent_parser = HyperOptArgumentParser(strategy='grid_search',
                                           add_help=False)

    # gpu args
    parent_parser.add_argument('--test_tube_save_path',
                               type=str,
                               default=test_tube_dir,
                               help='where to save logs')
    parent_parser.add_argument('--model_save_path',
                               type=str,
                               default=checkpoint_dir,
                               help='where to save model')

    # allow model to overwrite or extend args
    parser = DSANet.add_model_specific_args(parent_parser, root_dir)
    hyperparams = parser.parse_args()
    print(hyperparams)
    setattr(hyperparams, 'batch_size', int(optimizer_params['batch_size']))
    setattr(hyperparams, 'drop_prob', optimizer_params['dropout'])
    setattr(hyperparams, 'learning_rate', optimizer_params['learning_rate'])
    setattr(hyperparams, 'd_model', int(optimizer_params['units']))
    # hyperparams['batch_size'] = optimizer_params['batch_size']
    # hyperparams['drop_prob'] = optimizer_params['dropout']
    # hyperparams['learning_rate'] = optimizer_params['learning_rate']
    # hyperparams['d_model'] = optimizer_params['units']
    print(hyperparams)
    hparams = hyperparams
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------

    print('loading model...')
    model = DSANet(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(name='dsanet_exp_{}_window={}_horizon={}'.format(
        hparams.data_name, hparams.window, hparams.horizon),
                     save_dir=hparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)

    checkpoint_callback = ModelCheckpoint(filepath=model_save_path,
                                          save_best_only=True,
                                          verbose=True,
                                          monitor='val_loss',
                                          mode='auto')

    early_stop = EarlyStopping(monitor='val_loss',
                               patience=25,
                               verbose=True,
                               mode='min')

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        gpus="0,1",
        distributed_backend='ddp',
        experiment=exp,
        early_stop_callback=early_stop,
        checkpoint_callback=checkpoint_callback,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    st_time = datetime.now()
    trainer.fit(model)
    eval_time = str(datetime.now() - st_time)
    print("Iteration %d: Getting results ... " % ITERATION)
    csv_load_path = '{}/{}/{}{}'.format(hparams.test_tube_save_path, exp.name,
                                        'version_', exp.version)
    df = pd.read_csv('{}/{}'.format(
        csv_load_path, 'metrics.csv'))  # change to experiment save dir
    min_idx = df['val_nd'].idxmin()

    of_connection = open(out_file, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([
        optimizer_params, hparams, df['tng_loss'].iloc[min_idx],
        df['val_loss'].iloc[min_idx], df['val_nd'].iloc[min_idx],
        df['NRMSE'].iloc[min_idx], df['val_rho10'].iloc[min_idx],
        df['val_rho50'].iloc[min_idx], df['val_rho90'].iloc[min_idx],
        eval_time, STATUS_OK
    ])
    of_connection.close()

    return {
        'loss': df['val_nd'].iloc[min_idx],
        'ND': df['val_nd'].iloc[min_idx],
        'NRMSE': df['NRMSE'].iloc[min_idx],
        'val_loss': df['val_loss'].iloc[min_idx],
        'params': optimizer_params,
        'rho_metric': {
            'rho10': df['val_rho10'].iloc[min_idx],
            'rho50': df['val_rho50'].iloc[min_idx],
            'rho90': df['val_rho90'].iloc[min_idx]
        },
        'iteration': ITERATION,
        'eval_time': eval_time,
        'status': STATUS_OK
    }

Example #19

0

Show file

File: trainer_main.py Project: nnormandin/pytorch-lightning

def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = torch.cuda.is_available()
    if hparams.disable_cuda:
        on_gpu = False

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    exp = Experiment(name=hparams.tt_name,
                     debug=hparams.debug,
                     save_dir=hparams.tt_save_path,
                     version=hparams.hpc_exp_number,
                     autosave=False,
                     description=hparams.tt_description)

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(experiment=exp,
                      on_gpu=on_gpu,
                      cluster=cluster,
                      enable_tqdm=hparams.enable_tqdm,
                      overfit_pct=hparams.overfit,
                      track_grad_norm=hparams.track_grad_norm,
                      fast_dev_run=hparams.fast_dev_run,
                      check_val_every_n_epoch=hparams.check_val_every_n_epoch,
                      accumulate_grad_batches=hparams.accumulate_grad_batches,
                      process_position=process_position,
                      current_gpu_name=current_gpu,
                      checkpoint_callback=checkpoint,
                      early_stop_callback=early_stop,
                      enable_early_stop=hparams.enable_early_stop,
                      max_nb_epochs=hparams.max_nb_epochs,
                      min_nb_epochs=hparams.min_nb_epochs,
                      train_percent_check=hparams.train_percent_check,
                      val_percent_check=hparams.val_percent_check,
                      test_percent_check=hparams.test_percent_check,
                      val_check_interval=hparams.val_check_interval,
                      log_save_interval=hparams.log_save_interval,
                      add_log_row_interval=hparams.add_log_row_interval,
                      lr_scheduler_milestones=hparams.lr_scheduler_milestones)

    # train model
    trainer.fit(model)

Example #20

0

Show file

File: cpc_training_smaller_model.py Project: vincentherrmann/immersions-model

def main(hparams, cluster=None, results_dict=None):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    log_dir = os.path.dirname(os.path.realpath(__file__))
    exp = Experiment(name='test_tube_exp',
                     debug=True,
                     save_dir=log_dir,
                     version=0,
                     autosave=False,
                     description='test demo')

    hparams.training_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/training'
    hparams.validation_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/validation'
    hparams.test_task_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/test_task'
    hparams.dummy_datasets = False
    hparams.audio_noise = 3e-3

    hparams.cqt_fmin = 40.
    hparams.cqt_bins_per_octave = 24
    hparams.cqt_n_bins = 216
    hparams.cqt_hop_length = 512
    hparams.cqt_filter_scale = 0.43

    hparams.enc_channels = (1, 8, 16, 32, 64, 128, 256, 512, 512)
    hparams.enc_kernel_1_w = (3, 3, 3, 3, 3, 3, 3, 3)
    hparams.enc_kernel_1_h = (3, 3, 3, 3, 3, 3, 3, 3)
    hparams.enc_kernel_2_w = (1, 3, 1, 3, 1, 3, 1, 3)
    hparams.enc_kernel_2_h = (25, 3, 25, 3, 25, 3, 4, 3)
    hparams.enc_padding_1 = (1, 1, 1, 1, 1, 1, 1, 1)
    hparams.enc_padding_2 = (0, 1, 0, 1, 0, 1, 0, 0)
    hparams.enc_stride_1 = (1, 1, 1, 1, 1, 1, 1, 1)
    hparams.enc_stride_2 = (1, 1, 1, 1, 1, 1, 1, 1)
    hparams.enc_pooling_1 = (2, 1, 1, 1, 2, 1, 1, 1)

    hparams.ar_kernel_sizes = (5, 4, 1, 3, 3, 1, 3, 1, 6)
    hparams.ar_self_attention = (False, False, False, False, False, False,
                                 False, False, False)
    hparams.batch_size = 4
    hparams.learning_rate = 3e-4
    hparams.warmup_steps = 1000
    hparams.annealing_steps = 100000
    hparams.score_over_all_timesteps = False
    hparams.visible_steps = 60

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = ContrastivePredictiveSystem(hparams)

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        #early_stop_callback=early_stop,
        # distributed_backend='dp',
        #gpus=[0],
        nb_sanity_val_steps=2,
        gradient_clip=0.5)

    # train model
    trainer.fit(model)

Example #21

0

Show file

File: multi_node_cluster_auto_slurm.py Project: yenchiah/pytorch-lightning

def main(hparams, cluster):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------
    # when using grid search, it's possible for all models to start at once
    # and use the same test tube experiment version
    relative_node_id = int(os.environ['SLURM_NODEID'])
    sleep(relative_node_id + 1)

    # init experiment
    exp = Experiment(
        name=hyperparams.experiment_name,
        save_dir=hyperparams.test_tube_save_path,
        autosave=False,
        version=hparams.hpc_exp_number,  # match the slurm job version number
        description='test demo'
    )

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
    early_stop = EarlyStopping(
        monitor='val_acc',
        patience=3,
        verbose=True,
        mode='max'
    )

    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_best_only=True,
        verbose=True,
        monitor='val_loss',
        mode='min'
    )

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        gpus=hparams.per_experiment_nb_gpus,
        nb_gpu_nodes=hyperparams.nb_gpu_nodes
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)

Example #22

0

Show file

File: single_cpu_trainer.py Project: aleksei-mashlakov/multivariate-deep-learning

def main(hparams):
    """
    Main training routine specific for this project
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = DSANet(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(name='dsanet_exp_{}_window={}_horizon={}'.format(
        hparams.data_name, hparams.window, hparams.horizon),
                     save_dir=hparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)

    checkpoint_callback = ModelCheckpoint(filepath=model_save_path,
                                          save_best_only=True,
                                          verbose=True,
                                          monitor='val_loss',
                                          mode='auto')

    early_stop = EarlyStopping(monitor='val_loss',
                               patience=25,
                               verbose=True,
                               mode='min')

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        gpus="0",
        distributed_backend='dp',
        experiment=exp,
        early_stop_callback=early_stop,
        checkpoint_callback=checkpoint_callback,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    if hparams.test_only:
        model_load_path = '{}/{}'.format(hparams.model_save_path, exp.name)
        # metrics_load_path = '{}/{}'.format(hparams.test_tube_save_path, exp.name)

        path_list = [
            os.path.join(dirpath, filename)
            for dirpath, _, filenames in os.walk(model_load_path)
            for filename in filenames if filename.endswith('.ckpt')
        ]
        # for dirpath, dirnames, filenames in os.walk(model_load_path):
        #    if filename in [f for f in filenames if f.endswith(".ckpt")]:
        for filename in path_list:
            print(filename)
            data = filename.split("/")
            version_number = data[len(data) - 2]
            metrics_load_path = '{}/{}'.format(hparams.test_tube_save_path,
                                               exp.name)
            metrics_load_path = '{}/{}{}/{}'.format(metrics_load_path,
                                                    'version_', version_number,
                                                    'meta_tags.csv')
            print(metrics_load_path)
            hparams.metrics_load_path = metrics_load_path
            model = DSANet(hparams)
            model = DSANet.load_from_metrics(weights_path=filename,
                                             tags_csv=metrics_load_path,
                                             on_gpu=True)
            # model = LightningModule.load_from_checkpoint(filename)
            # test (pass in the model)
            hparams.metrics_load_path = metrics_load_path
            result = trainer.test(model)
            print(result)
    else:
        result = trainer.fit(model)

        print('View tensorboard logs by running\ntensorboard --logdir %s' %
              os.getcwd())
        print('and going to http://localhost:6006 on your browser')

Example #23

0

Show file

def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = LightningTemplateModel(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(
        name=hyperparams.experiment_name,
        save_dir=hyperparams.test_tube_save_path,
        autosave=False,
        description='test demo'
    )

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
    early_stop = EarlyStopping(
        monitor='val_acc',
        patience=3,
        verbose=True,
        mode='max'
    )

    checkpoint = ModelCheckpoint(
        filepath=model_save_path,
        save_best_only=True,
        verbose=True,
        monitor='val_loss',
        mode='min'
    )

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        gpus=hparams.gpus,
        use_amp=True
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    trainer.fit(model)

Example #24

0

Show file

def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = hparams.gpus is not None and torch.cuda.is_available()

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    log_dir = os.path.dirname(os.path.realpath(__file__))
    exp = Experiment(name='test_tube_exp',
                     debug=True,
                     save_dir=log_dir,
                     version=0,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # gpus are ; separated for inside a node and , within nodes
    gpu_list = None
    if hparams.gpus is not None:
        gpu_list = [int(x) for x in hparams.gpus.split(';')]

    # configure trainer
    trainer = Trainer(experiment=exp,
                      cluster=cluster,
                      checkpoint_callback=checkpoint,
                      early_stop_callback=early_stop,
                      gpus=gpu_list)

    # train model
    trainer.fit(model)

Example #25

0

Show file

File: cpc_training_smaller_house_remote.py Project: vincentherrmann/immersions-model

def main(hparams, cluster=None, results_dict=None):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    name = 'immersions_scalogram_resnet_house_smaller'
    version = 1
    hparams.log_dir = '/home/idivinci3005/experiments/logs'
    hparams.checkpoint_dir = '/home/idivinci3005/experiments/checkpoints/' + name + '/' + str(
        version)
    hparams.training_set_path = '/home/idivinci3005/data/immersions/training'
    hparams.validation_set_path = '/home/idivinci3005/data/immersions/validation'
    hparams.test_task_set_path = '/home/idivinci3005/data/immersions/test_task'
    hparams.dummy_datasets = False
    hparams.audio_noise = 3e-3

    hparams.cqt_fmin = 40.
    hparams.cqt_bins_per_octave = 24
    hparams.cqt_n_bins = 216
    hparams.cqt_hop_length = 512
    hparams.cqt_filter_scale = 0.43

    hparams.enc_channels = (1, 8, 16, 32, 64, 128, 256, 512, 512)
    hparams.enc_kernel_1_w = (3, 3, 3, 3, 3, 3, 3, 3)
    hparams.enc_kernel_1_h = (3, 3, 3, 3, 3, 3, 3, 3)
    hparams.enc_kernel_2_w = (1, 3, 1, 3, 1, 3, 1, 3)
    hparams.enc_kernel_2_h = (25, 3, 25, 3, 25, 3, 4, 3)
    hparams.enc_padding_1 = (1, 1, 1, 1, 1, 1, 1, 1)
    hparams.enc_padding_2 = (0, 1, 0, 1, 0, 1, 0, 0)
    hparams.enc_stride_1 = (1, 1, 1, 1, 1, 1, 1, 1)
    hparams.enc_stride_2 = (1, 1, 1, 1, 1, 1, 1, 1)
    hparams.enc_pooling_1 = (2, 1, 1, 1, 2, 1, 1, 1)

    hparams.ar_kernel_sizes = (5, 4, 1, 3, 3, 1, 3, 1, 6)
    hparams.ar_self_attention = (False, False, False, False, False, False,
                                 False, False, False)
    hparams.batch_size = 4
    hparams.learning_rate = 3e-4
    hparams.warmup_steps = 1000
    hparams.annealing_steps = 100000
    hparams.score_over_all_timesteps = False
    hparams.visible_steps = 60

    hparams.batch_size = 32
    hparams.learning_rate = 3e-4
    hparams.warmup_steps = 1000
    hparams.annealing_steps = 100000
    hparams.score_over_all_timesteps = False
    hparams.visible_steps = 60

    # init experiment
    exp = Experiment(name=name,
                     debug=False,
                     save_dir=hparams.log_dir,
                     version=version,
                     autosave=False,
                     description='test demo')

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = ContrastivePredictiveSystem(hparams)
    task_model = ClassificationTaskModel(
        model, task_dataset_path=hparams.test_task_set_path)

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    checkpoint = ModelCheckpoint(filepath=hparams.checkpoint_dir,
                                 save_best_only=False,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        #early_stop_callback=early_stop,
        #distributed_backend='dp',
        gpus=[0],
        nb_sanity_val_steps=5,
        val_check_interval=0.2,
        gradient_clip=0.5,
        track_grad_norm=2)

    # train model
    trainer.fit(model)

Example #26

0

Show file

File: cpc_training_maestro_remote.py Project: vincentherrmann/immersions-model

def main(hparams, cluster=None, results_dict=None):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    name = 'immersions_scalogram_resnet_maestro'
    version = 0
    hparams.log_dir = '/home/idivinci3005/experiments/logs'
    hparams.checkpoint_dir = '/home/idivinci3005/experiments/checkpoints/' + name + '/' + str(
        version)
    hparams.training_set_path = '/home/idivinci3005/data/maestro-v2.0.0'
    hparams.validation_set_path = '/home/idivinci3005/data/maestro-v2.0.0'
    hparams.test_task_set_path = '/home/idivinci3005/data/maestro-v2.0.0'
    hparams.audio_noise = 3e-3
    hparams.ar_kernel_sizes = (5, 4, 1, 3, 3, 1, 3, 1, 6)
    hparams.ar_self_attention = (False, False, False, False, False, False,
                                 False, False, False)
    hparams.batch_size = 32
    hparams.learning_rate = 3e-4
    hparams.warmup_steps = 1000
    hparams.annealing_steps = 100000
    hparams.score_over_all_timesteps = False
    hparams.visible_steps = 62

    if not os.path.exists(hparams.checkpoint_dir):
        os.mkdir(hparams.checkpoint_dir)

    exp = Experiment(name=name,
                     debug=False,
                     save_dir=hparams.log_dir,
                     version=version,
                     autosave=False,
                     description='maestro dataset experiment')

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = ContrastivePredictiveSystemMaestro(hparams)
    task_model = MaestroClassificationTaskModel(
        model, task_dataset_path=hparams.test_task_set_path)
    model.test_task_model = task_model

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    checkpoint = ModelCheckpoint(filepath=hparams.checkpoint_dir,
                                 save_best_only=False,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        #early_stop_callback=early_stop,
        # distributed_backend='dp',
        gpus=[0],
        nb_sanity_val_steps=5,
        val_check_interval=0.1,
        val_percent_check=0.25,
        #train_percent_check=0.01
    )

    # train model
    trainer.fit(model)

Example #27

0

Show file

def main(hparams, cluster, results_dict):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    on_gpu = torch.cuda.is_available()
    if hparams.disable_cuda:
        on_gpu = False

    device = 'cuda' if on_gpu else 'cpu'
    hparams.__setattr__('device', device)
    hparams.__setattr__('on_gpu', on_gpu)
    hparams.__setattr__('nb_gpus', torch.cuda.device_count())
    hparams.__setattr__('inference_mode', hparams.model_load_weights_path
                        is not None)

    # delay each training start to not overwrite logs
    process_position, current_gpu = TRAINING_MODEL.get_process_position(
        hparams.gpus)
    sleep(process_position + 1)

    # init experiment
    exp = Experiment(name=hparams.tt_name,
                     debug=hparams.debug,
                     save_dir=hparams.tt_save_path,
                     version=hparams.hpc_exp_number,
                     autosave=False,
                     description=hparams.tt_description)

    exp.argparse(hparams)
    exp.save()

    # build model
    print('loading model...')
    model = TRAINING_MODEL(hparams)
    print('model built')

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_function=None,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        cluster=cluster,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
    )

    # train model
    trainer.fit(model)

Example #28

0

Show file

File: pl_voronoi.py Project: tmquan/iVoi

                             ds_valid=ds_valid,
                             ds_test=ds_valid)

        #-----------------------------------------------------------------------
        # 2 INIT TEST TUBE EXP
        #-----------------------------------------------------------------------

        # init experiment
        exp = Experiment(
            name='voronoi',  #hyperparams.experiment_name,
            save_dir='runs',  #hyperparams.test_tube_save_path,
            # autosave=False,
            # description='experiment'
        )

        exp.save()

        #-----------------------------------------------------------------------
        # 3 DEFINE CALLBACKS
        #-----------------------------------------------------------------------
        model_save_path = 'pl_voronoi'  #'{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
        early_stop = EarlyStopping(monitor='avg_val_loss',
                                   patience=5,
                                   verbose=True,
                                   mode='auto')

        checkpoint = ModelCheckpoint(
            filepath=model_save_path,
            # save_best_only=True,
            # save_weights_only=True,
            verbose=True,

Example #29

0

Show file

File: cpc_training_maestro.py Project: vincentherrmann/immersions-model

def main(hparams, cluster=None, results_dict=None):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    log_dir = os.path.dirname(os.path.realpath(__file__))
    exp = Experiment(name='test_tube_exp',
                     debug=True,
                     save_dir=log_dir,
                     version=0,
                     autosave=False,
                     description='maestro dataset experiment')

    #hparams.training_set_path = '/Volumes/Elements/Datasets/maestro-v2.0.0'
    #hparams.validation_set_path = '/Volumes/Elements/Datasets/maestro-v2.0.0'
    #hparams.test_task_set_path = '/Volumes/Elements/Datasets/maestro-v2.0.0'
    hparams.training_set_path = 'C:/Users/HEV7RNG/Documents/data/maestro-v2.0.0'
    hparams.validation_set_path = 'C:/Users/HEV7RNG/Documents/data/maestro-v2.0.0'
    hparams.test_task_set_path = 'C:/Users/HEV7RNG/Documents/data/maestro-v2.0.0'
    hparams.audio_noise = 3e-3
    hparams.ar_kernel_sizes = (5, 4, 1, 3, 3, 1, 3, 1, 6)
    hparams.ar_self_attention = (False, False, False, False, False, False,
                                 False, False, False)
    hparams.batch_size = 4
    hparams.learning_rate = 2e-4
    hparams.warmup_steps = 1000
    hparams.annealing_steps = 100000
    hparams.score_over_all_timesteps = False
    hparams.visible_steps = 62

    # set the hparams for the experiment
    exp.argparse(hparams)
    exp.save()

    # build model
    model = ContrastivePredictiveSystemMaestro(hparams)
    task_model = MaestroClassificationTaskModel(
        model, task_dataset_path=hparams.validation_set_path)
    model.test_task_model = task_model

    # callbacks
    early_stop = EarlyStopping(monitor=hparams.early_stop_metric,
                               patience=hparams.early_stop_patience,
                               verbose=True,
                               mode=hparams.early_stop_mode)

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)
    checkpoint = ModelCheckpoint(filepath=model_save_path,
                                 save_best_only=True,
                                 verbose=True,
                                 monitor=hparams.model_save_monitor_value,
                                 mode=hparams.model_save_monitor_mode)

    # configure trainer
    trainer = Trainer(
        experiment=exp,
        checkpoint_callback=checkpoint,
        early_stop_callback=early_stop,
        # distributed_backend='dp',
        #gpus=[0],
        nb_sanity_val_steps=2)

    # train model
    trainer.fit(model)

Example #30

0

Show file

File: test_contrastivePredictiveSystem.py Project: vincentherrmann/immersions-model

    def test_training(self):
        # use default args given by lightning
        root_dir = '/Volumes/Elements/Projekte/Immersions'
        parent_parser = HyperOptArgumentParser(strategy='random_search', add_help=False)
        add_default_args(parent_parser, root_dir)

        # allow model to overwrite or extend args
        parser = ContrastivePredictiveSystem.add_model_specific_args(parent_parser, root_dir)
        hparams = parser.parse_args()

        name = 'immersions_scalogram_resnet_test'
        version = 0
        hparams.log_dir = '/Volumes/Elements/Projekte/Immersions/logs'
        hparams.checkpoint_dir = '/Volumes/Elements/Projekte/Immersions/checkpoints'
        hparams.training_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/training'
        hparams.validation_set_path = '/Volumes/Elements/Datasets/Immersions/house_data_mp3/validation'
        hparams.dummy_datasets = False
        hparams.batch_size = 64
        hparams.learning_rate = 2e-4
        hparams.warmup_steps = 1000
        hparams.annealing_steps = 100000

        # init experiment
        exp = Experiment(
            name=name,
            debug=False,
            save_dir=hparams.log_dir,
            version=version,
            autosave=False,
            description='test demo'
        )

        # set the hparams for the experiment
        exp.argparse(hparams)
        exp.save()

        # build model
        model = ContrastivePredictiveSystem(hparams)

        # callbacks
        early_stop = EarlyStopping(
            monitor=hparams.early_stop_metric,
            patience=hparams.early_stop_patience,
            verbose=True,
            mode=hparams.early_stop_mode
        )

        checkpoint = ModelCheckpoint(
            filepath=hparams.checkpoint_dir,
            save_best_only=False,
            verbose=True,
            monitor=hparams.model_save_monitor_value,
            mode=hparams.model_save_monitor_mode
        )

        # configure trainer
        trainer = Trainer(
            experiment=exp,
            checkpoint_callback=checkpoint,
            # early_stop_callback=early_stop,
            # distributed_backend='dp',
            gpus=[0],
            nb_sanity_val_steps=5,
            val_check_interval=0.2,
            train_percent_check=0.01,
            max_nb_epochs=1
        )

        # train model
        trainer.fit(model)