def get_callbacks(CONF, use_lr_decay=True): """ Get a callback list to feed fit_generator. #TODO Use_remote callback needs proper configuration #TODO Add ReduceLROnPlateau callback? Parameters ---------- CONF: dict Returns ------- List of callbacks """ calls = [] # Add mandatory callbacks calls.append(callbacks.TerminateOnNaN()) calls.append(LRHistory()) # Add optional callbacks if use_lr_decay: milestones = np.array(CONF['training']['lr_step_schedule']) * CONF['training']['epochs'] milestones = milestones.astype(np.int) calls.append(LR_scheduler(lr_decay=CONF['training']['lr_step_decay'], epoch_milestones=milestones.tolist())) if CONF['monitor']['use_tensorboard']: calls.append(callbacks.TensorBoard(log_dir=paths.get_logs_dir(), write_graph=False)) # # Let the user launch Tensorboard # print('Monitor your training in Tensorboard by executing the following comand on your console:') # print(' tensorboard --logdir={}'.format(paths.get_logs_dir())) # Run Tensorboard on a separate Thread/Process on behalf of the user port = os.getenv('monitorPORT', 6006) port = int(port) if len(str(port)) >= 4 else 6006 subprocess.run(['fuser', '-k', '{}/tcp'.format(port)]) # kill any previous process in that port p = Process(target=launch_tensorboard, args=(port,), daemon=True) p.start() if CONF['monitor']['use_remote']: calls.append(callbacks.RemoteMonitor()) if CONF['training']['use_validation'] and CONF['training']['use_early_stopping']: calls.append(callbacks.EarlyStopping(patience=int(0.1 * CONF['training']['epochs']))) if CONF['training']['ckpt_freq'] is not None: calls.append(callbacks.ModelCheckpoint( os.path.join(paths.get_checkpoints_dir(), 'epoch-{epoch:02d}.hdf5'), verbose=1, period=max(1, int(CONF['training']['ckpt_freq'] * CONF['training']['epochs'])))) if not calls: calls = None return calls
def save_default_imagenet_model(): """ Create a model in models_dir with default ImageNet training """ CONF = config.get_conf_dict() TIMESTAMP = 'default_imagenet' # Clear default conf and create custom conf for k, v in CONF.items(): if k in ['general', 'augmentation']: continue for i, j in v.items(): CONF[k][i] = None CONF['augmentation']['train_mode'] = None CONF['model']['modelname'] = 'Xception' CONF['model']['image_size'] = 224 CONF['model']['preprocess_mode'] = model_modes[CONF['model']['modelname']] CONF['model']['num_classes'] = 1000 CONF['dataset']['mean_RGB'] = [123.675, 116.28, 103.53] CONF['dataset']['std_RGB'] = [58.395, 57.12, 57.375] paths.timestamp = TIMESTAMP paths.CONF = CONF # Create classes.txt for ImageNet fpath = keras.utils.get_file( 'imagenet_class_index.json', 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json', cache_subdir='models', file_hash='c2c37ea517e94d9795004a39431a14cb') with open(fpath) as f: classes = json.load(f) classes = np.array(list(classes.values()))[:, 1] # Create the model architecture = getattr(applications, CONF['model']['modelname']) img_width, img_height = CONF['model']['image_size'], CONF['model']['image_size'] model = architecture(weights='imagenet', include_top=True, input_shape=(img_width, img_height, 3)) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Save everything utils.create_dir_tree() np.savetxt(os.path.join(paths.get_ts_splits_dir(), 'classes.txt'), classes, fmt='%s', delimiter='/n') save_conf(CONF) model.save(fpath=os.path.join(paths.get_checkpoints_dir(), 'final_model.h5'), include_optimizer=False)
def load_inference_model(timestamp=None, ckpt_name=None): """ Load a model for prediction. Parameters ---------- * timestamp: str Name of the timestamp to use. The default is the last timestamp in `./models`. * ckpt_name: str Name of the checkpoint to use. The default is the last checkpoint in `./models/[timestamp]/ckpts`. """ global loaded_ts, loaded_ckpt global graph, model, conf, class_names, class_info # Set the timestamp timestamp_list = next(os.walk(paths.get_models_dir()))[1] timestamp_list = sorted(timestamp_list) if not timestamp_list: raise Exception( "You have no models in your `./models` folder to be used for inference. " "Therefore the API can only be used for training.") elif timestamp is None: timestamp = timestamp_list[-1] elif timestamp not in timestamp_list: raise ValueError( "Invalid timestamp name: {}. Available timestamp names are: {}". format(timestamp, timestamp_list)) paths.timestamp = timestamp print('Using TIMESTAMP={}'.format(timestamp)) # Set the checkpoint model to use to make the prediction ckpt_list = os.listdir(paths.get_checkpoints_dir()) ckpt_list = sorted([name for name in ckpt_list if name.endswith('.h5')]) if not ckpt_list: raise Exception( "You have no checkpoints in your `./models/{}/ckpts` folder to be used for inference. " .format(timestamp) + "Therefore the API can only be used for training.") elif ckpt_name is None: ckpt_name = ckpt_list[-1] elif ckpt_name not in ckpt_list: raise ValueError( "Invalid checkpoint name: {}. Available checkpoint names are: {}". format(ckpt_name, ckpt_list)) print('Using CKPT_NAME={}'.format(ckpt_name)) # Clear the previous loaded model K.clear_session() # Load the class names and info splits_dir = paths.get_ts_splits_dir() class_names = load_class_names(splits_dir=splits_dir) class_info = None if 'info.txt' in os.listdir(splits_dir): class_info = load_class_info(splits_dir=splits_dir) if len(class_info) != len(class_names): warnings.warn( """The 'classes.txt' file has a different length than the 'info.txt' file. If a class has no information whatsoever you should leave that classes row empty or put a '-' symbol. The API will run with no info until this is solved.""") class_info = None if class_info is None: class_info = ['' for _ in range(len(class_names))] # Load training configuration conf_path = os.path.join(paths.get_conf_dir(), 'conf.json') with open(conf_path) as f: conf = json.load(f) update_with_saved_conf(conf) # Load the model model = load_model(os.path.join(paths.get_checkpoints_dir(), ckpt_name), custom_objects=utils.get_custom_objects()) graph = tf.get_default_graph() # Set the model as loaded loaded_ts = timestamp loaded_ckpt = ckpt_name
def train_fn(TIMESTAMP, CONF): paths.timestamp = TIMESTAMP paths.CONF = CONF utils.create_dir_tree() utils.backup_splits() # Load the training data X_train, y_train = load_data_splits(splits_dir=paths.get_ts_splits_dir(), im_dir=paths.get_images_dir(), split_name='train') # Load the validation data if (CONF['training']['use_validation']) and ('val.txt' in os.listdir(paths.get_ts_splits_dir())): X_val, y_val = load_data_splits(splits_dir=paths.get_ts_splits_dir(), im_dir=paths.get_images_dir(), split_name='val') else: print('No validation data.') X_val, y_val = None, None CONF['training']['use_validation'] = False # Load the class names class_names = load_class_names(splits_dir=paths.get_ts_splits_dir()) # Update the configuration CONF['model']['preprocess_mode'] = model_utils.model_modes[CONF['model']['modelname']] CONF['training']['batch_size'] = min(CONF['training']['batch_size'], len(X_train)) if CONF['model']['num_classes'] is None: CONF['model']['num_classes'] = len(class_names) assert CONF['model']['num_classes'] >= np.amax(y_train), "Your train.txt file has more categories than those defined in classes.txt" if CONF['training']['use_validation']: assert CONF['model']['num_classes'] >= np.amax(y_val), "Your val.txt file has more categories than those defined in classes.txt" # Compute the class weights if CONF['training']['use_class_weights']: class_weights = compute_classweights(y_train, max_dim=CONF['model']['num_classes']) else: class_weights = None # Compute the mean and std RGB values if CONF['dataset']['mean_RGB'] is None: CONF['dataset']['mean_RGB'], CONF['dataset']['std_RGB'] = compute_meanRGB(X_train) #Create data generator for train and val sets train_gen = data_sequence(X_train, y_train, batch_size=CONF['training']['batch_size'], num_classes=CONF['model']['num_classes'], im_size=CONF['model']['image_size'], mean_RGB=CONF['dataset']['mean_RGB'], std_RGB=CONF['dataset']['std_RGB'], preprocess_mode=CONF['model']['preprocess_mode'], aug_params=CONF['augmentation']['train_mode']) train_steps = int(np.ceil(len(X_train)/CONF['training']['batch_size'])) if CONF['training']['use_validation']: val_gen = data_sequence(X_val, y_val, batch_size=CONF['training']['batch_size'], num_classes=CONF['model']['num_classes'], im_size=CONF['model']['image_size'], mean_RGB=CONF['dataset']['mean_RGB'], std_RGB=CONF['dataset']['std_RGB'], preprocess_mode=CONF['model']['preprocess_mode'], aug_params=CONF['augmentation']['val_mode']) val_steps = int(np.ceil(len(X_val)/CONF['training']['batch_size'])) else: val_gen = None val_steps = None # Launch the training t0 = time.time() # Create the model and compile it model, base_model = model_utils.create_model(CONF) # Get a list of the top layer variables that should not be applied a lr_multiplier base_vars = [var.name for var in base_model.trainable_variables] all_vars = [var.name for var in model.trainable_variables] top_vars = set(all_vars) - set(base_vars) top_vars = list(top_vars) # Set trainable layers if CONF['training']['mode'] == 'fast': for layer in base_model.layers: layer.trainable = False model.compile(optimizer=customAdam(lr=CONF['training']['initial_lr'], amsgrad=True, lr_mult=0.1, excluded_vars=top_vars ), loss='categorical_crossentropy', metrics=['accuracy']) history = model.fit_generator(generator=train_gen, steps_per_epoch=train_steps, epochs=CONF['training']['epochs'], class_weight=class_weights, validation_data=val_gen, validation_steps=val_steps, callbacks=utils.get_callbacks(CONF), verbose=1, max_queue_size=5, workers=4, use_multiprocessing=True, initial_epoch=0) # Saving everything print('Saving data to {} folder.'.format(paths.get_timestamped_dir())) print('Saving training stats ...') stats = {'epoch': history.epoch, 'training time (s)': round(time.time()-t0, 2), 'timestamp': TIMESTAMP} stats.update(history.history) stats = json_friendly(stats) stats_dir = paths.get_stats_dir() with open(os.path.join(stats_dir, 'stats.json'), 'w') as outfile: json.dump(stats, outfile, sort_keys=True, indent=4) print('Saving the configuration ...') model_utils.save_conf(CONF) print('Saving the model to h5...') fpath = os.path.join(paths.get_checkpoints_dir(), 'final_model.h5') model.save(fpath, include_optimizer=False) # print('Saving the model to protobuf...') # fpath = os.path.join(paths.get_checkpoints_dir(), 'final_model.proto') # model_utils.save_to_pb(model, fpath) print('Finished')
# Set the timestamp paths.timestamp = TIMESTAMP # Load the data print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++antes de class names") class_names = load_class_names(splits_dir=paths.get_ts_splits_dir()) # INCISO: Estas son las clases que había en el modelo # en el momento en el que estrenaste (dado por el timestamp). No las que tienes en data/dataset_files print("----------------------------------------------------------------------despues de class names") # Load training configuration conf_path = os.path.join(paths.get_conf_dir(), 'conf.json') with open(conf_path) as f: conf = json.load(f) # Load the model print("--------------------------------------------------------------------------------------------------------------------------------------------------------------------------antes") model = load_model(os.path.join(paths.get_checkpoints_dir(), MODEL_NAME)) #model = load_model(os.path.join(paths.get_checkpoints_dir(), MODEL_NAME), custom_objects=utils.get_custom_objects()) print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++después") # INCISO: Ahora la parte que continúa está basada en el predicting a datasplit txt file que incluye Ignacio en el notebook # 3.0 . Esta preparación previa es necesaria para computar la matriz de confusión. # # OJO: ahora lo que le vas a dar para testear el modelo dado por el timestamp SÍ se encuentra en data/dataset_files # Y ES CON LO QUE TÚ QUIERES TESTEAR EL MODELO. SPLIT_NAME = input("Indica el nombre del split con el que evaluas. Es de data/dataset_files. Ejemplos: val train ...: ") # Load the data X, y = load_data_splits(splits_dir=paths.get_ts_splits_dir(), im_dir=conf['general']['images_directory'], split_name=SPLIT_NAME) # Predict # Añade esto si quieres no usar aumentacion en la validacion: #
def load_inference_model(): """ Load a model for prediction. If several timestamps are available in `./models` it will load `.models/api` or the last timestamp if `api` is not available. If several checkpoints are available in `./models/[timestamp]/ckpts` it will load `.models/[timestamp]/ckpts/final_model.h5` or the last checkpoint if `final_model.h5` is not available. """ global loaded, graph, model, conf, class_names, class_info # Set the timestamp timestamps = next(os.walk(paths.get_models_dir()))[1] if not timestamps: raise BadRequest( """You have no models in your `./models` folder to be used for inference. Therefore the API can only be used for training.""") else: if 'api' in timestamps: TIMESTAMP = 'api' else: TIMESTAMP = sorted(timestamps)[-1] paths.timestamp = TIMESTAMP print('Using TIMESTAMP={}'.format(TIMESTAMP)) # Set the checkpoint model to use to make the prediction ckpts = os.listdir(paths.get_checkpoints_dir()) if not ckpts: raise BadRequest( """You have no checkpoints in your `./models/{}/ckpts` folder to be used for inference. Therefore the API can only be used for training.""".format( TIMESTAMP)) else: if 'final_model.h5' in ckpts: MODEL_NAME = 'final_model.h5' else: MODEL_NAME = sorted( [name for name in ckpts if name.endswith('*.h5')])[-1] print('Using MODEL_NAME={}'.format(MODEL_NAME)) # Clear the previous loaded model K.clear_session() # Load the class names and info splits_dir = paths.get_ts_splits_dir() class_names = load_class_names(splits_dir=splits_dir) class_info = None if 'info.txt' in os.listdir(splits_dir): class_info = load_class_info(splits_dir=splits_dir) if len(class_info) != len(class_names): warnings.warn( """The 'classes.txt' file has a different length than the 'info.txt' file. If a class has no information whatsoever you should leave that classes row empty or put a '-' symbol. The API will run with no info until this is solved.""") class_info = None if class_info is None: class_info = ['' for _ in range(len(class_names))] # Load training configuration conf_path = os.path.join(paths.get_conf_dir(), 'conf.json') with open(conf_path) as f: conf = json.load(f) # Load the model model = load_model(os.path.join(paths.get_checkpoints_dir(), MODEL_NAME), custom_objects=utils.get_custom_objects()) graph = tf.get_default_graph() # Set the model as loaded loaded = True