Beispiel #1
0
def get_callbacks(CONF, use_lr_decay=True):
    """
    Get a callback list to feed fit_generator.
    #TODO Use_remote callback needs proper configuration
    #TODO Add ReduceLROnPlateau callback?

    Parameters
    ----------
    CONF: dict

    Returns
    -------
    List of callbacks
    """

    calls = []

    # Add mandatory callbacks
    calls.append(callbacks.TerminateOnNaN())
    calls.append(LRHistory())

    # Add optional callbacks
    if use_lr_decay:
        milestones = np.array(CONF['training']['lr_step_schedule']) * CONF['training']['epochs']
        milestones = milestones.astype(np.int)
        calls.append(LR_scheduler(lr_decay=CONF['training']['lr_step_decay'],
                                  epoch_milestones=milestones.tolist()))

    if CONF['monitor']['use_tensorboard']:
        calls.append(callbacks.TensorBoard(log_dir=paths.get_logs_dir(), write_graph=False))

        # # Let the user launch Tensorboard
        # print('Monitor your training in Tensorboard by executing the following comand on your console:')
        # print('    tensorboard --logdir={}'.format(paths.get_logs_dir()))
        # Run Tensorboard  on a separate Thread/Process on behalf of the user
        port = os.getenv('monitorPORT', 6006)
        port = int(port) if len(str(port)) >= 4 else 6006
        subprocess.run(['fuser', '-k', '{}/tcp'.format(port)]) # kill any previous process in that port
        p = Process(target=launch_tensorboard, args=(port,), daemon=True)
        p.start()

    if CONF['monitor']['use_remote']:
        calls.append(callbacks.RemoteMonitor())

    if CONF['training']['use_validation'] and CONF['training']['use_early_stopping']:
        calls.append(callbacks.EarlyStopping(patience=int(0.1 * CONF['training']['epochs'])))

    if CONF['training']['ckpt_freq'] is not None:
        calls.append(callbacks.ModelCheckpoint(
            os.path.join(paths.get_checkpoints_dir(), 'epoch-{epoch:02d}.hdf5'),
            verbose=1,
            period=max(1, int(CONF['training']['ckpt_freq'] * CONF['training']['epochs']))))

    if not calls:
        calls = None

    return calls
Beispiel #2
0
def save_default_imagenet_model():
    """
    Create a model in models_dir with default ImageNet training
    """
    CONF = config.get_conf_dict()
    TIMESTAMP = 'default_imagenet'

    # Clear default conf and create custom conf
    for k, v in CONF.items():
        if k in ['general', 'augmentation']:
            continue
        for i, j in v.items():
            CONF[k][i] = None
    CONF['augmentation']['train_mode'] = None

    CONF['model']['modelname'] = 'Xception'
    CONF['model']['image_size'] = 224
    CONF['model']['preprocess_mode'] = model_modes[CONF['model']['modelname']]
    CONF['model']['num_classes'] = 1000
    CONF['dataset']['mean_RGB'] = [123.675, 116.28, 103.53]
    CONF['dataset']['std_RGB'] = [58.395, 57.12, 57.375]

    paths.timestamp = TIMESTAMP
    paths.CONF = CONF

    # Create classes.txt for ImageNet
    fpath = keras.utils.get_file(
        'imagenet_class_index.json',
        'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json',
        cache_subdir='models',
        file_hash='c2c37ea517e94d9795004a39431a14cb')
    with open(fpath) as f:
        classes = json.load(f)
    classes = np.array(list(classes.values()))[:, 1]

    # Create the model
    architecture = getattr(applications, CONF['model']['modelname'])
    img_width, img_height = CONF['model']['image_size'], CONF['model']['image_size']
    model = architecture(weights='imagenet', include_top=True, input_shape=(img_width, img_height, 3))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Save everything
    utils.create_dir_tree()
    np.savetxt(os.path.join(paths.get_ts_splits_dir(), 'classes.txt'), classes, fmt='%s', delimiter='/n')
    save_conf(CONF)
    model.save(fpath=os.path.join(paths.get_checkpoints_dir(), 'final_model.h5'),
               include_optimizer=False)
Beispiel #3
0
def load_inference_model(timestamp=None, ckpt_name=None):
    """
    Load a model for prediction.

    Parameters
    ----------
    * timestamp: str
        Name of the timestamp to use. The default is the last timestamp in `./models`.
    * ckpt_name: str
        Name of the checkpoint to use. The default is the last checkpoint in `./models/[timestamp]/ckpts`.
    """
    global loaded_ts, loaded_ckpt
    global graph, model, conf, class_names, class_info

    # Set the timestamp
    timestamp_list = next(os.walk(paths.get_models_dir()))[1]
    timestamp_list = sorted(timestamp_list)
    if not timestamp_list:
        raise Exception(
            "You have no models in your `./models` folder to be used for inference. "
            "Therefore the API can only be used for training.")
    elif timestamp is None:
        timestamp = timestamp_list[-1]
    elif timestamp not in timestamp_list:
        raise ValueError(
            "Invalid timestamp name: {}. Available timestamp names are: {}".
            format(timestamp, timestamp_list))
    paths.timestamp = timestamp
    print('Using TIMESTAMP={}'.format(timestamp))

    # Set the checkpoint model to use to make the prediction
    ckpt_list = os.listdir(paths.get_checkpoints_dir())
    ckpt_list = sorted([name for name in ckpt_list if name.endswith('.h5')])
    if not ckpt_list:
        raise Exception(
            "You have no checkpoints in your `./models/{}/ckpts` folder to be used for inference. "
            .format(timestamp) +
            "Therefore the API can only be used for training.")
    elif ckpt_name is None:
        ckpt_name = ckpt_list[-1]
    elif ckpt_name not in ckpt_list:
        raise ValueError(
            "Invalid checkpoint name: {}. Available checkpoint names are: {}".
            format(ckpt_name, ckpt_list))
    print('Using CKPT_NAME={}'.format(ckpt_name))

    # Clear the previous loaded model
    K.clear_session()

    # Load the class names and info
    splits_dir = paths.get_ts_splits_dir()
    class_names = load_class_names(splits_dir=splits_dir)
    class_info = None
    if 'info.txt' in os.listdir(splits_dir):
        class_info = load_class_info(splits_dir=splits_dir)
        if len(class_info) != len(class_names):
            warnings.warn(
                """The 'classes.txt' file has a different length than the 'info.txt' file.
            If a class has no information whatsoever you should leave that classes row empty or put a '-' symbol.
            The API will run with no info until this is solved.""")
            class_info = None
    if class_info is None:
        class_info = ['' for _ in range(len(class_names))]

    # Load training configuration
    conf_path = os.path.join(paths.get_conf_dir(), 'conf.json')
    with open(conf_path) as f:
        conf = json.load(f)
        update_with_saved_conf(conf)

    # Load the model
    model = load_model(os.path.join(paths.get_checkpoints_dir(), ckpt_name),
                       custom_objects=utils.get_custom_objects())
    graph = tf.get_default_graph()

    # Set the model as loaded
    loaded_ts = timestamp
    loaded_ckpt = ckpt_name
Beispiel #4
0
def train_fn(TIMESTAMP, CONF):

    paths.timestamp = TIMESTAMP
    paths.CONF = CONF

    utils.create_dir_tree()
    utils.backup_splits()

    # Load the training data
    X_train, y_train = load_data_splits(splits_dir=paths.get_ts_splits_dir(),
                                        im_dir=paths.get_images_dir(),
                                        split_name='train')

    # Load the validation data
    if (CONF['training']['use_validation']) and ('val.txt' in os.listdir(paths.get_ts_splits_dir())):
        X_val, y_val = load_data_splits(splits_dir=paths.get_ts_splits_dir(),
                                        im_dir=paths.get_images_dir(),
                                        split_name='val')
    else:
        print('No validation data.')
        X_val, y_val = None, None
        CONF['training']['use_validation'] = False

    # Load the class names
    class_names = load_class_names(splits_dir=paths.get_ts_splits_dir())

    # Update the configuration
    CONF['model']['preprocess_mode'] = model_utils.model_modes[CONF['model']['modelname']]
    CONF['training']['batch_size'] = min(CONF['training']['batch_size'], len(X_train))

    if CONF['model']['num_classes'] is None:
        CONF['model']['num_classes'] = len(class_names)

    assert CONF['model']['num_classes'] >= np.amax(y_train), "Your train.txt file has more categories than those defined in classes.txt"
    if CONF['training']['use_validation']:
        assert CONF['model']['num_classes'] >= np.amax(y_val), "Your val.txt file has more categories than those defined in classes.txt"

    # Compute the class weights
    if CONF['training']['use_class_weights']:
        class_weights = compute_classweights(y_train,
                                             max_dim=CONF['model']['num_classes'])
    else:
        class_weights = None

    # Compute the mean and std RGB values
    if CONF['dataset']['mean_RGB'] is None:
        CONF['dataset']['mean_RGB'], CONF['dataset']['std_RGB'] = compute_meanRGB(X_train)

    #Create data generator for train and val sets
    train_gen = data_sequence(X_train, y_train,
                              batch_size=CONF['training']['batch_size'],
                              num_classes=CONF['model']['num_classes'],
                              im_size=CONF['model']['image_size'],
                              mean_RGB=CONF['dataset']['mean_RGB'],
                              std_RGB=CONF['dataset']['std_RGB'],
                              preprocess_mode=CONF['model']['preprocess_mode'],
                              aug_params=CONF['augmentation']['train_mode'])
    train_steps = int(np.ceil(len(X_train)/CONF['training']['batch_size']))

    if CONF['training']['use_validation']:
        val_gen = data_sequence(X_val, y_val,
                                batch_size=CONF['training']['batch_size'],
                                num_classes=CONF['model']['num_classes'],
                                im_size=CONF['model']['image_size'],
                                mean_RGB=CONF['dataset']['mean_RGB'],
                                std_RGB=CONF['dataset']['std_RGB'],
                                preprocess_mode=CONF['model']['preprocess_mode'],
                                aug_params=CONF['augmentation']['val_mode'])
        val_steps = int(np.ceil(len(X_val)/CONF['training']['batch_size']))
    else:
        val_gen = None
        val_steps = None

    # Launch the training
    t0 = time.time()

    # Create the model and compile it
    model, base_model = model_utils.create_model(CONF)

    # Get a list of the top layer variables that should not be applied a lr_multiplier
    base_vars = [var.name for var in base_model.trainable_variables]
    all_vars = [var.name for var in model.trainable_variables]
    top_vars = set(all_vars) - set(base_vars)
    top_vars = list(top_vars)

    # Set trainable layers
    if CONF['training']['mode'] == 'fast':
        for layer in base_model.layers:
            layer.trainable = False

    model.compile(optimizer=customAdam(lr=CONF['training']['initial_lr'],
                                       amsgrad=True,
                                       lr_mult=0.1,
                                       excluded_vars=top_vars
                                       ),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    history = model.fit_generator(generator=train_gen,
                                  steps_per_epoch=train_steps,
                                  epochs=CONF['training']['epochs'],
                                  class_weight=class_weights,
                                  validation_data=val_gen,
                                  validation_steps=val_steps,
                                  callbacks=utils.get_callbacks(CONF),
                                  verbose=1, max_queue_size=5, workers=4,
                                  use_multiprocessing=True, initial_epoch=0)

    # Saving everything
    print('Saving data to {} folder.'.format(paths.get_timestamped_dir()))
    print('Saving training stats ...')
    stats = {'epoch': history.epoch,
             'training time (s)': round(time.time()-t0, 2),
             'timestamp': TIMESTAMP}
    stats.update(history.history)
    stats = json_friendly(stats)
    stats_dir = paths.get_stats_dir()
    with open(os.path.join(stats_dir, 'stats.json'), 'w') as outfile:
        json.dump(stats, outfile, sort_keys=True, indent=4)

    print('Saving the configuration ...')
    model_utils.save_conf(CONF)

    print('Saving the model to h5...')
    fpath = os.path.join(paths.get_checkpoints_dir(), 'final_model.h5')
    model.save(fpath,
               include_optimizer=False)

    # print('Saving the model to protobuf...')
    # fpath = os.path.join(paths.get_checkpoints_dir(), 'final_model.proto')
    # model_utils.save_to_pb(model, fpath)

    print('Finished')
# Set the timestamp
paths.timestamp = TIMESTAMP

# Load the data
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++antes de class names")
class_names = load_class_names(splits_dir=paths.get_ts_splits_dir()) # INCISO: Estas son las clases que había en el modelo
# en el momento en el que estrenaste (dado por el timestamp). No las que tienes en data/dataset_files
print("----------------------------------------------------------------------despues de class names")
# Load training configuration
conf_path = os.path.join(paths.get_conf_dir(), 'conf.json')
with open(conf_path) as f:
    conf = json.load(f)
    
# Load the model
print("--------------------------------------------------------------------------------------------------------------------------------------------------------------------------antes")
model = load_model(os.path.join(paths.get_checkpoints_dir(), MODEL_NAME))
#model = load_model(os.path.join(paths.get_checkpoints_dir(), MODEL_NAME), custom_objects=utils.get_custom_objects())
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++después")
# INCISO: Ahora la parte que continúa está basada en el predicting a datasplit txt file que incluye Ignacio en el notebook
# 3.0 . Esta preparación previa es necesaria para computar la matriz de confusión. 
#
# OJO: ahora lo que le vas a dar para testear el modelo dado por el timestamp SÍ se encuentra en data/dataset_files 
# Y ES CON LO QUE TÚ QUIERES TESTEAR EL MODELO.
SPLIT_NAME = input("Indica el nombre del split con el que evaluas. Es de data/dataset_files. Ejemplos: val train ...: ")
# Load the data
X, y = load_data_splits(splits_dir=paths.get_ts_splits_dir(),
                        im_dir=conf['general']['images_directory'],
                        split_name=SPLIT_NAME)
# Predict
# Añade esto si quieres no usar aumentacion en la validacion:
# 
def load_inference_model():
    """
    Load a model for prediction.

    If several timestamps are available in `./models` it will load `.models/api` or the last timestamp if `api` is not
    available.
    If several checkpoints are available in `./models/[timestamp]/ckpts` it will load
    `.models/[timestamp]/ckpts/final_model.h5` or the last checkpoint if `final_model.h5` is not available.
    """
    global loaded, graph, model, conf, class_names, class_info

    # Set the timestamp
    timestamps = next(os.walk(paths.get_models_dir()))[1]
    if not timestamps:
        raise BadRequest(
            """You have no models in your `./models` folder to be used for inference.
            Therefore the API can only be used for training.""")
    else:
        if 'api' in timestamps:
            TIMESTAMP = 'api'
        else:
            TIMESTAMP = sorted(timestamps)[-1]
        paths.timestamp = TIMESTAMP
        print('Using TIMESTAMP={}'.format(TIMESTAMP))

        # Set the checkpoint model to use to make the prediction
        ckpts = os.listdir(paths.get_checkpoints_dir())
        if not ckpts:
            raise BadRequest(
                """You have no checkpoints in your `./models/{}/ckpts` folder to be used for inference.
                Therefore the API can only be used for training.""".format(
                    TIMESTAMP))
        else:
            if 'final_model.h5' in ckpts:
                MODEL_NAME = 'final_model.h5'
            else:
                MODEL_NAME = sorted(
                    [name for name in ckpts if name.endswith('*.h5')])[-1]
            print('Using MODEL_NAME={}'.format(MODEL_NAME))

            # Clear the previous loaded model
            K.clear_session()

            # Load the class names and info
            splits_dir = paths.get_ts_splits_dir()
            class_names = load_class_names(splits_dir=splits_dir)
            class_info = None
            if 'info.txt' in os.listdir(splits_dir):
                class_info = load_class_info(splits_dir=splits_dir)
                if len(class_info) != len(class_names):
                    warnings.warn(
                        """The 'classes.txt' file has a different length than the 'info.txt' file.
                    If a class has no information whatsoever you should leave that classes row empty or put a '-' symbol.
                    The API will run with no info until this is solved.""")
                    class_info = None
            if class_info is None:
                class_info = ['' for _ in range(len(class_names))]

            # Load training configuration
            conf_path = os.path.join(paths.get_conf_dir(), 'conf.json')
            with open(conf_path) as f:
                conf = json.load(f)

            # Load the model
            model = load_model(os.path.join(paths.get_checkpoints_dir(),
                                            MODEL_NAME),
                               custom_objects=utils.get_custom_objects())
            graph = tf.get_default_graph()

    # Set the model as loaded
    loaded = True