Beispiel #1
0
def launch_tensorboard(port=6006):
    subprocess.call([
        'tensorboard', '--logdir', '{}'.format(paths.get_logs_dir()), '--port',
        '{}'.format(port), '--host', '0.0.0.0'
    ])
Beispiel #2
0
def get_callbacks(CONF, use_lr_decay=True):
    """
    Get a callback list to feed fit_generator.
    #TODO Use_remote callback needs proper configuration
    #TODO Add ReduceLROnPlateau callback?

    Parameters
    ----------
    CONF: dict

    Returns
    -------
    List of callbacks
    """

    calls = []

    # Add mandatory callbacks
    calls.append(callbacks.TerminateOnNaN())
    calls.append(LRHistory())

    # Add optional callbacks
    if use_lr_decay:
        milestones = np.array(
            CONF['training']['lr_step_schedule']) * CONF['training']['epochs']
        milestones = milestones.astype(np.int)
        calls.append(
            LR_scheduler(lr_decay=CONF['training']['lr_step_decay'],
                         epoch_milestones=milestones.tolist()))

    if CONF['monitor']['use_tensorboard']:
        calls.append(
            callbacks.TensorBoard(log_dir=paths.get_logs_dir(),
                                  write_graph=False))

        # # Let the user launch Tensorboard
        # print('Monitor your training in Tensorboard by executing the following comand on your console:')
        # print('    tensorboard --logdir={}'.format(paths.get_logs_dir()))
        # Run Tensorboard  on a separate Thread/Process on behalf of the user
        port = os.getenv('monitorPORT', 6006)
        port = int(port) if len(str(port)) >= 4 else 6006
        subprocess.run(['fuser', '-k', '{}/tcp'.format(port)
                        ])  # kill any previous process in that port
        p = Process(target=launch_tensorboard, args=(port, ), daemon=True)
        p.start()

    if CONF['monitor']['use_remote']:
        calls.append(callbacks.RemoteMonitor())

    if CONF['training']['use_validation'] and CONF['training'][
            'use_early_stopping']:
        calls.append(
            callbacks.EarlyStopping(patience=int(0.1 *
                                                 CONF['training']['epochs'])))

    if CONF['training']['ckpt_freq'] is not None:
        calls.append(
            callbacks.ModelCheckpoint(os.path.join(paths.get_checkpoints_dir(),
                                                   'epoch-{epoch:02d}.hdf5'),
                                      verbose=1,
                                      save_best_only=True,
                                      period=max(
                                          1,
                                          int(CONF['training']['ckpt_freq'] *
                                              CONF['training']['epochs']))))

    if not calls:
        calls = None

    return calls