Beispiel #1
0
def backup_splits():
    """
    Save the data splits used during training to the timestamped dir.
    """
    src = paths.get_splits_dir()
    dst = paths.get_ts_splits_dir()
    copy_tree(src, dst)
Beispiel #2
0
def save_default_imagenet_model():
    """
    Create a model in models_dir with default ImageNet training
    """
    CONF = config.get_conf_dict()
    TIMESTAMP = 'default_imagenet'

    # Clear default conf and create custom conf
    for k, v in CONF.items():
        if k in ['general', 'augmentation']:
            continue
        for i, j in v.items():
            CONF[k][i] = None
    CONF['augmentation']['train_mode'] = None

    CONF['model']['modelname'] = 'Xception'
    CONF['model']['image_size'] = 224
    CONF['model']['preprocess_mode'] = model_modes[CONF['model']['modelname']]
    CONF['model']['num_classes'] = 1000
    CONF['dataset']['mean_RGB'] = [123.675, 116.28, 103.53]
    CONF['dataset']['std_RGB'] = [58.395, 57.12, 57.375]

    paths.timestamp = TIMESTAMP
    paths.CONF = CONF

    # Create classes.txt for ImageNet
    fpath = keras.utils.get_file(
        'imagenet_class_index.json',
        'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json',
        cache_subdir='models',
        file_hash='c2c37ea517e94d9795004a39431a14cb')
    with open(fpath) as f:
        classes = json.load(f)
    classes = np.array(list(classes.values()))[:, 1]

    # Create the model
    architecture = getattr(applications, CONF['model']['modelname'])
    img_width, img_height = CONF['model']['image_size'], CONF['model'][
        'image_size']
    model = architecture(weights='imagenet',
                         include_top=True,
                         input_shape=(img_width, img_height, 3))
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Save everything
    utils.create_dir_tree()
    np.savetxt(os.path.join(paths.get_ts_splits_dir(), 'classes.txt'),
               classes,
               fmt='%s',
               delimiter='/n')
    save_conf(CONF)
    model.save(fpath=os.path.join(paths.get_checkpoints_dir(),
                                  'final_model.h5'),
               include_optimizer=False)
Beispiel #3
0
def train_fn(TIMESTAMP, CONF):

    paths.timestamp = TIMESTAMP
    paths.CONF = CONF

    utils.create_dir_tree()
    utils.backup_splits()

    # Load the training data
    X_train, y_train = load_data_splits(
        splits_dir=paths.get_ts_splits_dir(),
        im_dir=paths.get_images_dir(),
        use_location=CONF['training']['use_location'],
        split_name='train')

    # Load the validation data
    if (CONF['training']['use_validation']) and ('val.txt' in os.listdir(
            paths.get_ts_splits_dir())):
        X_val, y_val = load_data_splits(
            splits_dir=paths.get_ts_splits_dir(),
            im_dir=paths.get_images_dir(),
            use_location=CONF['training']['use_location'],
            split_name='val')
    else:
        print('No validation data.')
        X_val, y_val = None, None
        CONF['training']['use_validation'] = False

    # Load the class names
    class_names = load_class_names(splits_dir=paths.get_ts_splits_dir())

    # Update the configuration
    CONF['model']['preprocess_mode'] = model_utils.model_modes[CONF['model']
                                                               ['modelname']]
    CONF['training']['batch_size'] = min(CONF['training']['batch_size'],
                                         len(X_train))

    if CONF['model']['num_classes'] is None:
        CONF['model']['num_classes'] = len(class_names)

    assert CONF['model']['num_classes'] >= np.amax(
        y_train
    ), "Your train.txt file has more categories than those defined in classes.txt"
    if CONF['training']['use_validation']:
        assert CONF['model']['num_classes'] >= np.amax(
            y_val
        ), "Your val.txt file has more categories than those defined in classes.txt"

    # Compute the class weights
    if CONF['training']['use_class_weights']:
        class_weights = compute_classweights(
            y_train, max_dim=CONF['model']['num_classes'])
    else:
        class_weights = None

    # Compute the mean and std RGB values
    if CONF['dataset']['mean_RGB'] is None:
        CONF['dataset']['mean_RGB'], CONF['dataset'][
            'std_RGB'] = compute_meanRGB(X_train)

    #Create data generator for train and val sets
    train_gen = data_sequence(X_train,
                              y_train,
                              batch_size=CONF['training']['batch_size'],
                              num_classes=CONF['model']['num_classes'],
                              im_size=CONF['model']['image_size'],
                              mean_RGB=CONF['dataset']['mean_RGB'],
                              std_RGB=CONF['dataset']['std_RGB'],
                              preprocess_mode=CONF['model']['preprocess_mode'],
                              aug_params=CONF['augmentation']['train_mode'])
    train_steps = int(np.ceil(len(X_train) / CONF['training']['batch_size']))

    if CONF['training']['use_validation']:
        val_gen = data_sequence(
            X_val,
            y_val,
            batch_size=CONF['training']['batch_size'],
            num_classes=CONF['model']['num_classes'],
            im_size=CONF['model']['image_size'],
            mean_RGB=CONF['dataset']['mean_RGB'],
            std_RGB=CONF['dataset']['std_RGB'],
            preprocess_mode=CONF['model']['preprocess_mode'],
            aug_params=CONF['augmentation']['val_mode'])
        val_steps = int(np.ceil(len(X_val) / CONF['training']['batch_size']))
    else:
        val_gen = None
        val_steps = None

    # Launch the training
    t0 = time.time()

    # Create the model and compile it
    model, base_model = model_utils.create_model(CONF)

    # Get a list of the top layer variables that should not be applied a lr_multiplier
    base_vars = [var.name for var in base_model.trainable_variables]
    all_vars = [var.name for var in model.trainable_variables]
    top_vars = set(all_vars) - set(base_vars)
    top_vars = list(top_vars)

    # Set trainable layers
    if CONF['training']['mode'] == 'fast':
        for layer in base_model.layers:
            layer.trainable = False

    model.compile(optimizer=customAdam(lr=CONF['training']['initial_lr'],
                                       amsgrad=True,
                                       lr_mult=0.1,
                                       excluded_vars=top_vars),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    history = model.fit_generator(generator=train_gen,
                                  steps_per_epoch=train_steps,
                                  epochs=CONF['training']['epochs'],
                                  class_weight=class_weights,
                                  validation_data=val_gen,
                                  validation_steps=val_steps,
                                  callbacks=utils.get_callbacks(CONF),
                                  verbose=1,
                                  max_queue_size=5,
                                  workers=4,
                                  use_multiprocessing=True,
                                  initial_epoch=0)

    # Saving everything
    print('Saving data to {} folder.'.format(paths.get_timestamped_dir()))
    print('Saving training stats ...')
    stats = {
        'epoch': history.epoch,
        'training time (s)': round(time.time() - t0, 2),
        'timestamp': TIMESTAMP
    }
    stats.update(history.history)
    stats = json_friendly(stats)
    stats_dir = paths.get_stats_dir()
    with open(os.path.join(stats_dir, 'stats.json'), 'w') as outfile:
        json.dump(stats, outfile, sort_keys=True, indent=4)

    print('Saving the configuration ...')
    model_utils.save_conf(CONF)

    print('Saving the model to h5...')
    fpath = os.path.join(paths.get_checkpoints_dir(), 'final_model.h5')
    model.save(fpath, include_optimizer=False)

    # print('Saving the model to protobuf...')
    # fpath = os.path.join(paths.get_checkpoints_dir(), 'final_model.proto')
    # model_utils.save_to_pb(model, fpath)

    print('Finished')
Beispiel #4
0
#def use_location():
#    location = input("¿Usas location? si o no?")
#    if location == "si":
#        return True
#    if location == "no":
#        return False
#use_lo = use_location() #No lo necesitas, porque el método ya está entrenado con localización y guardado en su configuración
TIMESTAMP = input("Indica el timestamp. Sin espacios. Mismo formato que en models: ")                       # timestamp of the model
MODEL_NAME = input("Indica el nombre del modelo que se encuentra en ckpts. Sin espacios: ")                           # model to use to make the prediction
#TOP_K = input("Indica el numero de top K: ")                                               # number of top classes predictions to save
TOP_K = 8
# Set the timestamp
paths.timestamp = TIMESTAMP

# Load the data
print(paths.get_ts_splits_dir())
class_names = load_class_names(splits_dir=paths.get_ts_splits_dir()) # INCISO: Estas son las clases que había en el modelo
# en el momento en el que estrenaste (dado por el timestamp). No las que tienes en data/dataset_files
# Load training configuration
conf_path = os.path.join(paths.get_conf_dir(), 'conf.json')
with open(conf_path) as f:
    conf = json.load(f)
#print(conf(['training']['use_location']))
print(type(conf))
use_lo = conf['training']['use_location']
if use_lo:
    print("Estás usando un modelo con localización, necesitas que los splits de validación, train etc que uses tengan dicha etiqueta") 
# Load the model
#print("--------------------------------------------------------------------------------------------------------------------------------------------------------------------------antes")
#model = load_model(os.path.join(paths.get_checkpoints_dir(), MODEL_NAME),custom_objects={'customAdam':customAdam },compile=False)
#OJO: uso compile = false porque al parecer los checkpoints intermedios que se guardan 
Beispiel #5
0
#        return False
#use_lo = use_location() #No lo necesitas, porque el método ya está entrenado con localización y guardado en su configuración
TIMESTAMP = input(
    "Indica el timestamp. Sin espacios. Mismo formato que en models: "
)  # timestamp of the model
MODEL_NAME = input(
    "Indica el nombre del modelo que se encuentra en ckpts. Sin espacios: "
)  # model to use to make the prediction
TOP_K = input(
    "Indica el numero de top K: ")  # number of top classes predictions to save

# Set the timestamp
paths.timestamp = TIMESTAMP

# Load the data
class_names = load_class_names(splits_dir=paths.get_ts_splits_dir(
))  # INCISO: Estas son las clases que había en el modelo
# en el momento en el que estrenaste (dado por el timestamp). No las que tienes en data/dataset_files
# Load training configuration
conf_path = os.path.join(paths.get_conf_dir(), 'conf.json')
with open(conf_path) as f:
    conf = json.load(f)
#print(conf(['training']['use_location']))
print(type(conf))
use_lo = conf['training']['use_location']
if use_lo:
    print(
        "Estás usando un modelo con localización, necesitas que los splits de validación, train etc que uses tengan dicha etiqueta"
    )
# Load the model
#print("--------------------------------------------------------------------------------------------------------------------------------------------------------------------------antes")
#model = load_model(os.path.join(paths.get_checkpoints_dir(), MODEL_NAME),custom_objects={'customAdam':customAdam },compile=False)