Beispiel #1
0
    # although we user hyperOptParser, we are using it only as argparse right now
    parent_parser = HyperOptArgumentParser(strategy='grid_search',
                                           add_help=False)

    # gpu args
    parent_parser.add_argument('--test_tube_save_path',
                               type=str,
                               default=test_tube_dir,
                               help='where to save logs')
    parent_parser.add_argument('--model_save_path',
                               type=str,
                               default=checkpoint_dir,
                               help='where to save model')

    # allow model to overwrite or extend args
    parser = DSANet.add_model_specific_args(parent_parser, root_dir)
    hyperparams = parser.parse_args()
    print(hyperparams)

    # ---------------------
    # RUN TRAINING
    # ---------------------
    # run on HPC cluster
    print(f'RUNNING ON CPU')
    # * change the following code to comments for grid search
    main(hyperparams)

    # * recover the following code for grid search
    # hyperparams.optimize_parallel_cpu(
    #     main,
    #     nb_trials=24,    # this number needs to be adjusted according to the actual situation
Beispiel #2
0
def optimize(optimizer_params):
    """
    Main training routine specific for this project
    """
    global val_results, test_results
    global val_out_file, test_out_file, ITERATION, epochs
    ITERATION += 1
    root_dir = os.path.dirname(os.path.realpath(__file__))
    # although we user hyperOptParser, we are using it only as argparse right now
    parent_parser = ArgumentParser(add_help=False)

    # allow model to overwrite or extend args
    parser = DSANet.add_model_specific_args(parent_parser, root_dir)
    hyperparams = parser.parse_args()
    dataset = DataUtil(hyperparams, 2)
    if hasattr(dataset, 'scale'):
        #print('we have scale')
        setattr(hyperparams, 'scale', dataset.scale)
        #print(dataset.scale)
    if hasattr(dataset, 'scaler'):
        #print('we have scaler')
        setattr(hyperparams, 'scaler', dataset.scaler)
        #rint(dataset.scaler)

    setattr(hyperparams, 'n_multiv', dataset.m)
    setattr(hyperparams, 'batch_size', int(optimizer_params['batch_size']))
    setattr(hyperparams, 'drop_prob', optimizer_params['dropout'])
    setattr(hyperparams, 'learning_rate', optimizer_params['learning_rate'])
    setattr(hyperparams, 'd_model', int(optimizer_params['units']))
    setattr(hyperparams, 'local', int(optimizer_params['local']))
    setattr(hyperparams, 'n_kernels', int(optimizer_params['n_kernels']))
    setattr(hyperparams, 'window', int(optimizer_params['window']))
    hparams = hyperparams
    print(
        f"\n#######\nTESTING hparams: mv:{hparams.n_multiv}, bs:{hparams.batch_size}, drop:{hparams.drop_prob}, lr:{hparams.learning_rate}, d_model:{hparams.d_model}, local:{hparams.local}, n_kernels:{hparams.n_kernels}, window:{hparams.window}\n#######"
    )

    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------
    print('loading model...')
    model = DSANet(hparams)
    print('model built')
    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------
    filename = '{}{}{}{}{}{}'.format('my_dsanet_', hparams.data_name, '_',
                                     hparams.powerset, '_',
                                     str(hparams.calendar))
    logger = TestTubeLogger("tb_logs_v2", filename)
    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    early_stop_callback = EarlyStopping(monitor='val_loss',
                                        patience=5,
                                        verbose=False,
                                        mode='min')
    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = pl.Trainer(gpus=4,
                         distributed_backend='dp',
                         logger=logger,
                         early_stop_callback=early_stop_callback,
                         show_progress_bar=False,
                         profiler=True,
                         fast_dev_run=False,
                         max_epochs=100)
    # ------------------------
    # 5 START TRAINING
    # ------------------------
    st_time = datetime.now()
    result = trainer.fit(model)
    eval_result = model.val_results
    df1 = pd.DataFrame(eval_result, [ITERATION])
    print(result)
    eval_time = str(datetime.now() - st_time)
    print(f"Train time: {eval_time}, Results: {eval_result}")

    st_time = datetime.now()
    model.hparams.mcdropout = 'True'
    trainer.test(model)
    eval_time = str(datetime.now() - st_time)
    test_result = model.test_results
    df2 = pd.DataFrame(test_result, [ITERATION])
    print(f"Test time: {eval_time}, Results: {test_result}")
    df1 = pd.concat([df1, pd.DataFrame(vars(hparams), [ITERATION])],
                    axis=1,
                    sort=False)
    df2 = pd.concat([df2, pd.DataFrame(vars(hparams), [ITERATION])],
                    axis=1,
                    sort=False)

    val_results = pd.concat([val_results, df1], axis=0, sort=False)
    test_results = pd.concat([test_results, df2], axis=0, sort=False)
    return eval_result['val_nd_all']
def optimize(optimizer_params):
    """
    Main training routine specific for this project
    """
    global out_file, ITERATION
    ITERATION += 1
    # dirs
    root_dir = os.path.dirname(os.path.realpath(__file__))
    demo_log_dir = os.path.join(root_dir, 'dsanet_logs')
    checkpoint_dir = os.path.join(demo_log_dir, 'model_weights')
    test_tube_dir = os.path.join(demo_log_dir, 'test_tube_data')

    # although we user hyperOptParser, we are using it only as argparse right now
    parent_parser = ArgumentParser( add_help=False)

    # gpu args
    parent_parser.add_argument('--test_tube_save_path', type=str, default=test_tube_dir, help='where to save logs')
    parent_parser.add_argument('--model_save_path', type=str, default=checkpoint_dir, help='where to save model')

    # allow model to overwrite or extend args
    parser = DSANet.add_model_specific_args(parent_parser, root_dir)
    hyperparams = parser.parse_args()
    setattr(hyperparams, 'batch_size', int(optimizer_params['batch_size']))
    setattr(hyperparams, 'drop_prob', optimizer_params['dropout'])
    setattr(hyperparams, 'learning_rate', optimizer_params['learning_rate'])
    setattr(hyperparams, 'd_model', int(optimizer_params['units']))
    setattr(hyperparams, 'local', int(optimizer_params['local']))
    setattr(hyperparams, 'n_kernels', int(optimizer_params['n_kernels']))
    setattr(hyperparams, 'window', int(optimizer_params['window']))
    hparams = hyperparams
    print(f"TESTING hparams: mv:{hparams.n_multiv}, bs:{hparams.batch_size}, drop:{hparams.drop_prob}, lr:{hparams.learning_rate}, d_model:{hparams.d_model}, local:{hparams.local}, n_kernels:{hparams.n_kernels}, window:{hparams.window}")
    
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------

    print('loading model...')
    model = DSANet(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    logger = TestTubeLogger("tb_logs", name="my_dsanet_power_v2")

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------

    early_stop_callback = EarlyStopping(
        monitor='val_loss',
        patience=25,
        verbose=False,
        mode='min'
    )

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------

    trainer = pl.Trainer(
        gpus=4,
        distributed_backend='dp',
        logger=logger,
        early_stop_callback=early_stop_callback,
        show_progress_bar=False,
        log_save_interval=10,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    st_time = datetime.now()
    result = trainer.fit(model)
    print(result)
    eval_time = str(datetime.now() - st_time)
    print(eval_time)
    print(f"Iteration {ITERATION}: Getting results...")
    csv_load_path = os.path.join(root_dir, logger.experiment.save_dir)
    csv_load_path = '{}/{}/{}{}'.format(csv_load_path, logger.experiment.name, 'version_', logger.experiment.version)
    df = pd.read_csv('{}/{}'.format(csv_load_path, 'metrics.csv'))  # change to experiment save dir
    min_idx = df['val_nd'].idxmin()

    of_connection = open(out_file, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([optimizer_params, hparams, df['val_loss'].iloc[min_idx], df['val_loss'].iloc[min_idx],
                     df['val_nd'].iloc[min_idx], df['NRMSE'].iloc[min_idx], df['val_rho10'].iloc[min_idx],
                     df['val_rho50'].iloc[min_idx], df['val_rho90'].iloc[min_idx], eval_time, STATUS_OK])
    of_connection.close()
    #torch.cuda.empty_cache()
    return {'loss': df['val_nd'].iloc[min_idx],
            'ND': df['val_nd'].iloc[min_idx],
            'NRMSE': df['NRMSE'].iloc[min_idx],
            'val_loss': df['val_loss'].iloc[min_idx],
            'params': optimizer_params,
            'rho_metric': {'rho10': df['val_rho10'].iloc[min_idx], 'rho50': df['val_rho50'].iloc[min_idx],
                           'rho90': df['val_rho90'].iloc[min_idx]},
            'iteration': ITERATION,
            'eval_time': eval_time,
            'status': STATUS_OK}
def optimize(optimizer_params):
    """
    Main training routine specific for this project
    """
    logging.basicConfig(level=logging.INFO)
    # dirs
    root_dir = os.path.dirname(os.path.realpath(__file__))
    demo_log_dir = os.path.join(root_dir, 'dsanet_logs')
    checkpoint_dir = os.path.join(demo_log_dir, 'model_weights')
    test_tube_dir = os.path.join(demo_log_dir, 'test_tube_data')

    # although we user hyperOptParser, we are using it only as argparse right now
    parent_parser = HyperOptArgumentParser(strategy='grid_search',
                                           add_help=False)

    # gpu args
    parent_parser.add_argument('--test_tube_save_path',
                               type=str,
                               default=test_tube_dir,
                               help='where to save logs')
    parent_parser.add_argument('--model_save_path',
                               type=str,
                               default=checkpoint_dir,
                               help='where to save model')

    # allow model to overwrite or extend args
    parser = DSANet.add_model_specific_args(parent_parser, root_dir)
    hyperparams = parser.parse_args()
    print(hyperparams)
    setattr(hyperparams, 'batch_size', int(optimizer_params['batch_size']))
    setattr(hyperparams, 'drop_prob', optimizer_params['dropout'])
    setattr(hyperparams, 'learning_rate', optimizer_params['learning_rate'])
    setattr(hyperparams, 'd_model', int(optimizer_params['units']))
    # hyperparams['batch_size'] = optimizer_params['batch_size']
    # hyperparams['drop_prob'] = optimizer_params['dropout']
    # hyperparams['learning_rate'] = optimizer_params['learning_rate']
    # hyperparams['d_model'] = optimizer_params['units']
    print(hyperparams)
    hparams = hyperparams
    # ------------------------
    # 1 INIT LIGHTNING MODEL
    # ------------------------

    print('loading model...')
    model = DSANet(hparams)
    print('model built')

    # ------------------------
    # 2 INIT TEST TUBE EXP
    # ------------------------

    # init experiment
    exp = Experiment(name='dsanet_exp_{}_window={}_horizon={}'.format(
        hparams.data_name, hparams.window, hparams.horizon),
                     save_dir=hparams.test_tube_save_path,
                     autosave=False,
                     description='test demo')

    exp.argparse(hparams)
    exp.save()

    # ------------------------
    # 3 DEFINE CALLBACKS
    # ------------------------
    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name,
                                        exp.version)

    checkpoint_callback = ModelCheckpoint(filepath=model_save_path,
                                          save_best_only=True,
                                          verbose=True,
                                          monitor='val_loss',
                                          mode='auto')

    early_stop = EarlyStopping(monitor='val_loss',
                               patience=25,
                               verbose=True,
                               mode='min')

    # ------------------------
    # 4 INIT TRAINER
    # ------------------------
    trainer = Trainer(
        gpus="0,1",
        distributed_backend='ddp',
        experiment=exp,
        early_stop_callback=early_stop,
        checkpoint_callback=checkpoint_callback,
    )

    # ------------------------
    # 5 START TRAINING
    # ------------------------
    st_time = datetime.now()
    trainer.fit(model)
    eval_time = str(datetime.now() - st_time)
    print("Iteration %d: Getting results ... " % ITERATION)
    csv_load_path = '{}/{}/{}{}'.format(hparams.test_tube_save_path, exp.name,
                                        'version_', exp.version)
    df = pd.read_csv('{}/{}'.format(
        csv_load_path, 'metrics.csv'))  # change to experiment save dir
    min_idx = df['val_nd'].idxmin()

    of_connection = open(out_file, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([
        optimizer_params, hparams, df['tng_loss'].iloc[min_idx],
        df['val_loss'].iloc[min_idx], df['val_nd'].iloc[min_idx],
        df['NRMSE'].iloc[min_idx], df['val_rho10'].iloc[min_idx],
        df['val_rho50'].iloc[min_idx], df['val_rho90'].iloc[min_idx],
        eval_time, STATUS_OK
    ])
    of_connection.close()

    return {
        'loss': df['val_nd'].iloc[min_idx],
        'ND': df['val_nd'].iloc[min_idx],
        'NRMSE': df['NRMSE'].iloc[min_idx],
        'val_loss': df['val_loss'].iloc[min_idx],
        'params': optimizer_params,
        'rho_metric': {
            'rho10': df['val_rho10'].iloc[min_idx],
            'rho50': df['val_rho50'].iloc[min_idx],
            'rho90': df['val_rho90'].iloc[min_idx]
        },
        'iteration': ITERATION,
        'eval_time': eval_time,
        'status': STATUS_OK
    }