def find_last(self, verbose=0): ''' Finds the last checkpoint file of the last trained model in the model directory. Returns: -------- log_dir: The directory where events and weights are saved checkpoint_path: the path to the last checkpoint file ''' if verbose: print('>>> find_last checkpoint in : ', self.model_dir) # Get directory names. Each directory corresponds to a model dir_name, checkpoint = None, None dir_names = next(os.walk(self.model_dir))[1] key = self.config.NAME.lower() dir_names = list(filter(lambda f: f.startswith(key), dir_names)) if verbose: print('>>> find_last checkpoint in : ', self.model_dir) print(' Dir starting with : ', key, ' :', dir_names) dir_names = sorted(dir_names) if not dir_names: return None, None ## Loop over folders to find most recent foder with a valid weights file for search_dir in dir_names[-1::-1]: dir_name = os.path.join(self.model_dir, search_dir) # Find the last checkpoint in this dir checkpoints = next(os.walk(dir_name))[2] checkpoints = filter(lambda f: f.startswith(key), checkpoints) checkpoints = sorted(checkpoints) if verbose: print(' Folder: ', dir_name) print(' Checkpoints: ', checkpoints) if not checkpoints: continue # return dir_name, None checkpoint = os.path.join(dir_name, checkpoints[-1]) break # old method # dir_name = os.path.join(self.model_dir, dir_names[-1]) # Find the last checkpoint # checkpoints = next(os.walk(dir_name))[2] # checkpoints = filter(lambda f: f.startswith(key), checkpoints) # checkpoints = sorted(checkpoints) # if not checkpoints: # return dir_name, None # checkpoint = os.path.join(dir_name, checkpoints[-1]) if verbose: log(" find_last(): dir_name: {}".format( 'NotFound' if dir_name is None else dir_name)) log(" find_last(): checkpoint: {}".format( 'NotFound' if checkpoint is None else checkpoint)) return dir_name, checkpoint
def compile_only(self, learning_rate, layers): ''' Compile the model without adding loss info learning_rate: The learning rate to train with layers: Allows selecting wich layers to train. It can be: - A regular expression to match layer names to train - One of these predefined values: heads: The RPN, classifier and mask heads of the network all: All the layers 3+: Train Resnet stage 3 and up 4+: Train Resnet stage 4 and up 5+: Train Resnet stage 5 and up ''' # Use Pre-defined layer regular expressions if layers in self.layer_regex.keys(): layers = self.layer_regex[layers] # Train log("Compile with learing rate; {} Learning Moementum: {} ".format( learning_rate, self.config.LEARNING_MOMENTUM)) log("Checkpoint Folder: {} ".format(self.checkpoint_path)) self.set_trainable(layers) self.compile(learning_rate, self.config.LEARNING_MOMENTUM) out_labels = self.get_deduped_metrics_names() callback_metrics = out_labels + ['val_' + n for n in out_labels] print( 'Callback_metrics are: ( val + _get_deduped_metrics_names() )\n') pp.pprint(callback_metrics) return
def set_log_dir(self, model_path=None): """Sets the model log directory and epoch counter. model_path: If None, or a format different from what this code uses then set a new log directory and start epochs from 0. Otherwise, extract the log directory and the epoch counter from the file name. """ # Set date and epoch counter as if starting a new model # print('>>> Set_log_dir() -- model dir is ', self.model_dir) # print(' model_path : ', model_path) # print(' config.LAST_EPOCH_RAN: ', self.config.LAST_EPOCH_RAN) self.tb_dir = os.path.join(self.model_dir, 'tensorboard') # self.epoch = 0 last_checkpoint_epoch = 0 now = datetime.datetime.now() # If we have a model path with date and epochs use them if model_path: # Continue from we left off. Get epoch and date from the file name # A sample model path might look like: # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5 model_path = model_path.replace('\\', "/") # print(' set_log_dir: model_path (input) is : {} '.format(model_path)) regex = r".*/\w+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/mask\_rcnn\_\w+(\d{4})\.h5" m = re.match(regex, model_path) if m: now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5))) last_checkpoint_epoch = int(m.group(6)) + 1 # print(' set_log_dir: self.epoch set to {} (Next epoch to run)'.format(self.epoch)) # print(' set_log_dir: tensorboard path: {}'.format(self.tb_dir)) if last_checkpoint_epoch > 0 and self.config.LAST_EPOCH_RAN > last_checkpoint_epoch: self.epoch = self.config.LAST_EPOCH_RAN else: self.epoch = last_checkpoint_epoch # Directory for training logs self.log_dir = os.path.join( self.model_dir, "{}{:%Y%m%dT%H%M}".format(self.config.NAME.lower(), now)) # Path to save after each epoch. Include placeholders that get filled by Keras. self.checkpoint_path = os.path.join( self.log_dir, "mask_rcnn_{}_*epoch*.h5".format(self.config.NAME.lower())) self.checkpoint_path = self.checkpoint_path.replace( "*epoch*", "{epoch:04d}") log(' set_log_dir: Checkpoint path set to : {}'.format( self.checkpoint_path)) log(' set_log_dir: self.epoch set to {} '.format(self.epoch))
def run_graph(self, images, outputs): '''Runs a sub-set of the computation graph that computes the given outputs. outputs: List of tuples (name, tensor) to compute. The tensors are symbolic TensorFlow tensors and the names are for easy tracking. Returns an ordered dict of results. Keys are the names received in the input and values are Numpy arrays. ''' model = self.keras_model # Organize desired outputs into an ordered dict outputs = OrderedDict(outputs) for o in outputs.values(): assert o is not None # Build a Keras function to run parts of the computation graph inputs = model.inputs if model.uses_learning_phase and not isinstance( KB.learning_phase(), int): inputs += [KB.learning_phase()] kf = KB.function(model.inputs, list(outputs.values())) # Run inference molded_images, image_metas, windows = self.mold_inputs(images) # TODO: support training mode? # if TEST_MODE == "training": # model_in = [molded_images, image_metas, # target_rpn_match, target_rpn_bbox, # input_normalized_gt_boxes, gt_masks] # if not config.USE_RPN_ROIS: # model_in.append(target_rois) # if model.uses_learning_phase and not isinstance(KB.learning_phase(), int): # model_in.append(1.) # outputs_np = kf(model_in) # else: model_in = [molded_images, image_metas] if model.uses_learning_phase and not isinstance( KB.learning_phase(), int): model_in.append(0.) outputs_np = kf(model_in) # Pack the generated Numpy arrays into a a dict and log the results. outputs_np = OrderedDict([(k, v) for k, v in zip(outputs.keys(), outputs_np)]) for k, v in outputs_np.items(): log(k, v) return outputs_np
def load_weights(self, filepath, by_name=False, exclude=None): """ Modified version of the correspoding Keras function with the addition of multi-GPU support and the ability to exclude some layers from loading. exlude: list of layer names to excluce """ import h5py from keras.engine import topology print('>>> load_weights()') if exclude: by_name = True if h5py is None: raise ImportError('`load_weights` requires h5py.') log(' load_weights: Loading weights from: {}'.format(filepath)) f = h5py.File(filepath, mode='r') if 'layer_names' not in f.attrs and 'model_weights' in f: f = f['model_weights'] # In multi-GPU training, we wrap the model. Get layers # of the inner model because they have the weights. keras_model = self.keras_model layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ else keras_model.layers # Exclude some layers if exclude: layers = filter(lambda l: l.name not in exclude, layers) # print(' layers to load ' ) # print('----------------' ) # for idx,layer in enumerate(layers): # print('>layer {} : name : {:40s} type: {}'.format(idx,layer.name,layer)) if by_name: topology.load_weights_from_hdf5_group_by_name(f, layers) else: topology.load_weights_from_hdf5_group(f, layers) if hasattr(f, 'close'): f.close() log(' load_weights: Log directory set to : {}'.format(filepath)) # Update the log directory self.set_log_dir(filepath) print(' Load weights complete : ', filepath) return (filepath)
def save_model(self, filepath, filename=None, by_name=False, exclude=None): ''' Modified version of the correspoding Keras function with the addition of multi-GPU support and the ability to exclude some layers from loading. exlude: list of layer names to excluce ''' print('>>> save_model_architecture()') model_json = self.keras_model.to_json() full_filepath = os.path.join(filepath, filename) log(' save model to {}'.format(full_filepath)) with open(full_filepath, 'w') as f: # json.dump(model_json, full_filepath) if hasattr(f, 'close'): f.close() print('file closed') print(' save_weights: save directory is : {}'.format(filepath)) print(' save model Load weights complete') return (filepath)
def save_model(self, filepath=None, filename=None, by_name=False, exclude=None): ''' Modified version of the correspoding Keras function with the addition of multi-GPU support and the ability to exclude some layers from loading. exlude: list of layer names to excluce ''' print('>>> save_model() -- Weights only') if os.path.splitext(filename)[1] != '.h5': filename += '.h5' if filepath is None: full_filepath = os.path.join(self.log_dir, filename) else: full_filepath = os.path.join(self.log_dir, filename) log(' save model to {}'.format(full_filepath)) self.keras_model.save_weights(full_filepath, overwrite=True) # Following doesnt' work - some objects are not JSON serializable # self.keras_model.save_model(model, filepath, overwrite=True, include_optimizer=True): # Following doesnt' work - some objects are not JSON serializable # model_json = self.keras_model.to_json() # with open(full_filepath , 'w') as f: # json.dump(model_json, full_filepath) # if hasattr(f, 'close'): # f.close() # print('file closed') print(' save_weights: save directory is : {}'.format(filepath)) print(' save model weights complete') return (full_filepath)
def set_trainable(self, layer_regex, keras_model=None, indent=0, verbose=0): ''' Sets model layers as trainable if their names match the given regular expression. ''' # Print message on the first call (but not on recursive calls) if verbose > 0 and keras_model is None: log("\nSelecting layers to train") log("-------------------------") log("{:5} {:20} {}".format('Layer', 'Layer Name', 'Layer Type')) keras_model = keras_model or self.keras_model # In multi-GPU training, we wrap the model. Get layers # of the inner model because they have the weights. layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ else keras_model.layers # go through layers one by one, if the layer matches a layer reg_ex, set it to trainable for ind, layer in enumerate(layers): # Is the layer a model? if layer.__class__.__name__ == 'Model': if verbose > 0: print("Entering model layer: ", layer.name, '------------------------------') self.set_trainable(layer_regex, keras_model=layer, indent=indent + 4) indent -= 4 if verbose > 0: print("Exiting model layer ", layer.name, '--------------------------------') continue if not layer.weights: if verbose > 0: log(" {}{:3} {:20} ({:20}) ............................no weights to train ]". \ format(" " * indent, ind, layer.name,layer.__class__.__name__)) continue # Is it trainable? trainable = bool(re.fullmatch(layer_regex, layer.name)) # Update layer. If layer is a container, update inner layer. if layer.__class__.__name__ == 'TimeDistributed': layer.layer.trainable = trainable else: layer.trainable = trainable # Print trainble layer names if trainable: log(" {}{:3} {:20} ({:20}) TRAIN ".\ format(" " * indent, ind, layer.name, layer.__class__.__name__)) else: if verbose > 0: log(" {}{:3} {:20} ({:20}) ............................not a layer we want to train ]". \ format(" " * indent, ind, layer.name, layer.__class__.__name__)) pass return
def set_log_dir(self, model_path=None, new_folder=False): ''' Sets the model log directory and epoch counter. model_path: If None, or a format different from what this code uses then set a new log directory and start epochs from 0. Otherwise, extract the log directory and the epoch counter from the file name. ''' # Set date and epoch counter as if starting a new model # print('>>> Set_log_dir() -- model dir is ', self.model_dir) # print(' model_path : ', model_path) # print(' config.LAST_EPOCH_RAN: ', self.config.LAST_EPOCH_RAN) self.tb_dir = os.path.join(self.model_dir, 'tensorboard') self.epoch = 0 regex_match = False last_checkpoint_epoch = 0 now = datetime.datetime.now() # If we have a model path with date and epochs use them if model_path: # Continue from we left off. Get epoch and date from the file name # A sample model path might look like: # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5 model_path = model_path.replace('\\', "/") # print(' set_log_dir: model_path (input) is : {} '.format(model_path)) regex = r".*/\w+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/fcn\w+(\d{4})\.h5" regex_match = re.match(regex, model_path) if regex_match: now = datetime.datetime(int(regex_match.group(1)), int(regex_match.group(2)), int(regex_match.group(3)), int(regex_match.group(4)), int(regex_match.group(5))) last_checkpoint_epoch = int(regex_match.group(6)) + 1 # print(' set_log_dir: self.epoch set to {} (Next epoch to run)'.format(self.epoch)) # print(' set_log_dir: tensorboard path: {}'.format(self.tb_dir)) if last_checkpoint_epoch > 0 and self.config.LAST_EPOCH_RAN > last_checkpoint_epoch: self.epoch = self.config.LAST_EPOCH_RAN else: self.epoch = last_checkpoint_epoch # Set directory for training logs # if new_folder = True or appropriate checkpoint filename was not found, generate new folder if new_folder or self.config.NEW_LOG_FOLDER: now = datetime.datetime.now() self.log_dir = os.path.join( self.model_dir, "{}{:%Y%m%dT%H%M}".format(self.config.NAME.lower(), now)) ##-------------------------------------------------------------------------------- ## Create checkpoint folder if it doesn't exists ##-------------------------------------------------------------------------------- from tensorflow.python.platform import gfile print(' set_log_dir(): self.log_dir : {} '.format(self.log_dir), file=sys.__stdout__) if not gfile.IsDirectory(self.log_dir): print(' Creating checkpoint folder : {}'.format(self.log_dir), file=sys.__stdout__) gfile.MakeDirs(self.log_dir) else: print(' Checkpoint folder already exists: {}'.format( self.log_dir), file=sys.__stdout__) # Path to save after each epoch. Include placeholders that get filled by Keras. self.checkpoint_path = os.path.join( self.log_dir, "{}_*epoch*.h5".format(self.config.NAME.lower())) self.checkpoint_path = self.checkpoint_path.replace( "*epoch*", "{epoch:04d}") log(' set_log_dir(): self.Checkpoint_path: {} '.format( self.checkpoint_path)) log(' set_log_dir(): self.log_dir : {} '.format(self.log_dir)) log(' set_log_dir(): Last completed epoch (self.epoch): {} '.format( self.epoch)) return
def load_weights(self, filepath, by_name=False, exclude=None, new_folder=False): ''' Modified version of the correspoding Keras function with the addition of multi-GPU support and the ability to exclude some layers from loading. exlude: list of layer names to excluce ''' import h5py from keras.engine import topology log(' >>> load_weights() from : {}'.format(filepath)) if exclude: by_name = True if h5py is None: raise ImportError('`load_weights` requires h5py.') f = h5py.File(filepath, mode='r') pp.pprint(f.__dict__) if 'layer_names' not in f.attrs and 'model_weights' in f: print('im here') f = f['model_weights'] else: print('im not here') # In multi-GPU training, we wrap the model. Get layers # of the inner model because they have the weights. keras_model = self.keras_model layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ else keras_model.layers print('\n\n') print('--------------------------------') print(' List of all Layers in Model ') print('--------------------------------') print('\n\n') for idx, layer in enumerate(layers): print('>layer {} : name : {:40s} type: {}'.format( idx, layer.name, layer)) # Exclude some layers if exclude: layers = filter(lambda l: l.name not in exclude, layers) print(' --------------------------------------') print(' layers to load (not in exclude list) ') print(' --------------------------------------') for idx, layer in enumerate(layers): print(' >layer {} : name : {:40s} type: {}'.format( idx, layer.name, layer)) print('\n\n') if by_name: topology.load_weights_from_hdf5_group_by_name(f, layers) else: topology.load_weights_from_hdf5_group(f, layers) if hasattr(f, 'close'): f.close() # Update the log directory print(' Weights file loaded: {} '.format(filepath)) print(' Weights file loaded: {} '.format(filepath), file=sys.__stdout__) if self.mode == 'training': self.set_log_dir(filepath, new_folder) print(" MODEL Load weight file COMPLETE ") return (filepath)
def build_heatmap_files(mrcnn_model, dataset, iterations=5, start_from=0, dest_path=None): ''' train_dataset: Training Dataset objects. ''' assert mrcnn_model.mode == "trainfcn", "Create model in training mode." log("Starting for {} iterations - batch size of each iteration: {}". format(iterations, batch_size)) log(" Output destination: {}".format(dest_path)) tr_generator = data_generator(dataset, mrcnn_model.config, shuffle=False, augment=False, batch_size=mrcnn_model.config.BATCH_SIZE, image_index=start_from) ## Start main loop epoch_idx = 0 for epoch_idx in range(iterations): tm_start = time.time() train_batch_x, train_batch_y = next(tr_generator) print( ' ==> mrcnn_model: step {} of {} iterations, image_id: {} '.format( epoch_idx, iterations, train_batch_x[1][:, 0])) # print(' length of train_batch_x:', len(train_batch_x), ' number of things in batch x :', train_batch_x[1].shape) # for i in train_batch_x: # print(' ', i.shape) # print('length of train_batch_y:', len(train_batch_y)) # results = get_layer_output_1(mrcnn_model.keras_model, train_batch_x, [0,1,2,3], 1) results = mrcnn_model.keras_model.predict(train_batch_x) # pr_hm_norm, gt_hm_norm, pr_hm_scores, gt_hm_scores = results[:4] for i in range(batch_size): # print(' pr_hm_norm shape :', results[0][i].shape) # print(' pr_hm_scores shape :', results[1][i].shape) # print(' gt_hm_norm shape :', results[2][i].shape) # print(' gt_hm_scores shape :', results[3][i].shape) image_id = train_batch_x[1][i, 0] coco_image_id = dataset.image_info[image_id]['id'] coco_filename = os.path.basename( dataset.image_info[image_id]['path']) ## If we want to save the files with a sequence # 0,1,2,.... which is the index of dataset.image_info[index] use this: # filename = 'hm_{:012d}.npz'.format(image_id) ## If we want to use the coco_id as the file name, use the following: filename = 'hm_{:012d}.npz'.format(coco_image_id) print( ' output: {} image_id: {} coco_image_id: {} coco_filename: {} output file: {}' .format(i, image_id, coco_image_id, coco_filename, filename)) # print(' output file: ',os.path.join(dest_path, filename)) np.savez_compressed(os.path.join(dest_path, filename), input_image_meta=train_batch_x[1][i], pr_hm_norm=results[0][i], pr_hm_scores=results[1][i], gt_hm_norm=results[2][i], gt_hm_scores=results[3][i], coco_info=np.array( [coco_image_id, coco_filename])) tm_stop = time.time() print(' ==> Elapsed time {:.4f}s # of items in results: {} '. format(tm_stop - tm_start, len(train_batch_x))) print('Final : mrcnn_model epoch_idx{} iterations {}'.format( epoch_idx, iterations)) return
def train_in_batches(self, train_dataset, val_dataset, learning_rate, layers, losses=None, epochs_to_run=1, batch_size=0, steps_per_epoch=0): ''' Train the model. train_dataset, val_dataset: Training and validation Dataset objects. learning_rate: The learning rate to train with epochs: Number of training epochs. Note that previous training epochs are considered to be done already, so this actually determines the epochs to train in total rather than in this particaular call. layers: Allows selecting wich layers to train. It can be: - A regular expression to match layer names to train - One of these predefined values: heads: The RPN, classifier and mask heads of the network all: All the layers 3+: Train Resnet stage 3 and up 4+: Train Resnet stage 4 and up 5+: Train Resnet stage 5 and up ''' assert self.mode == "training", "Create model in training mode." # Use Pre-defined layer regular expressions # if layers in self.layer_regex.keys(): # layers = self.layer_regex[layers] print(layers) train_regex_list = [self.layer_regex[x] for x in layers] print(train_regex_list) layers = '|'.join(train_regex_list) print('layers regex :', layers) if batch_size == 0: batch_size = self.config.BATCH_SIZE if steps_per_epoch == 0: steps_per_epoch = self.config.STEPS_PER_EPOCH # Data generators train_generator = data_generator(train_dataset, self.config, shuffle=True, batch_size=batch_size) val_generator = data_generator(val_dataset, self.config, shuffle=True, batch_size=batch_size, augment=False) log(" Last epoch completed : {} ".format(self.epoch)) log(" Starting from epoch : {} for {} epochs".format( self.epoch, epochs_to_run)) log(" Learning Rate : {} ".format(learning_rate)) log(" Steps per epoch : {} ".format(steps_per_epoch)) log(" Batchsize : {} ".format(batch_size)) log(" Checkpoint Folder : {} ".format(self.checkpoint_path)) epochs = self.epoch + epochs_to_run from tensorflow.python.platform import gfile if not gfile.IsDirectory(self.log_dir): log('Creating checkpoint folder') gfile.MakeDirs(self.log_dir) else: log('Checkpoint folder already exists') self.set_trainable(layers) self.compile(learning_rate, self.config.LEARNING_MOMENTUM, losses) # copied from \keras\engine\training.py # def _get_deduped_metrics_names(self): ## get metrics from keras_model.metrics_names out_labels = self.get_deduped_metrics_names() print(' ====> out_labels : ', out_labels) ## setup Progress Bar callback callback_metrics = out_labels + ['val_' + n for n in out_labels] print(' Callback metrics monitored by progbar') pp.pprint(callback_metrics) progbar = keras.callbacks.ProgbarLogger(count_mode='steps') progbar.set_model(self.keras_model) progbar.set_params({ 'epochs': epochs, 'steps': steps_per_epoch, 'verbose': 1, 'do_validation': False, 'metrics': callback_metrics, }) progbar.set_model(self.keras_model) ## setup Checkpoint callback chkpoint = keras.callbacks.ModelCheckpoint(self.checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True) chkpoint.set_model(self.keras_model) progbar.on_train_begin() epoch_idx = self.epoch if epoch_idx >= epochs: print( 'Final epoch {} has already completed - Training will not proceed' .format(epochs)) else: while epoch_idx < epochs: progbar.on_epoch_begin(epoch_idx) for steps_index in range(steps_per_epoch): batch_logs = {} # print(' self.epoch {} epochs {} step {} '.format(self.epoch, epochs, steps_index)) batch_logs['batch'] = steps_index batch_logs['size'] = batch_size progbar.on_batch_begin(steps_index, batch_logs) train_batch_x, train_batch_y = next(train_generator) outs = self.keras_model.train_on_batch( train_batch_x, train_batch_y) if not isinstance(outs, list): outs = [outs] for l, o in zip(out_labels, outs): batch_logs[l] = o progbar.on_batch_end(steps_index, batch_logs) # print(outs) progbar.on_epoch_end(epoch_idx, {}) # if (epoch_idx % 10) == 0: chkpoint.on_epoch_end(epoch_idx, batch_logs) epoch_idx += 1 # if epoch_idx != self.epoch: # chkpoint.on_epoch_end(epoch_idx -1, batch_logs) self.epoch = max(epoch_idx - 1, epochs) print('Final : self.epoch {} epochs {}'.format( self.epoch, epochs))
def train(self, train_dataset, val_dataset, learning_rate, layers=None, losses=None, epochs=0, epochs_to_run=0, batch_size=0, steps_per_epoch=0): ''' Train the model. train_dataset, val_dataset: Training and validation Dataset objects. learning_rate: The learning rate to train with layers: Allows selecting wich layers to train. It can be: - A regular expression to match layer names to train - One of these predefined values: heads: The RPN, classifier and mask heads of the network all: All the layers 3+: Train Resnet stage 3 and up 4+: Train Resnet stage 4 and up 5+: Train Resnet stage 5 and up losses: List of losses to monitor. epochs: Number of training epochs. Note that previous training epochs are considered to be done already, so this actually determines the epochs to train in total rather than in this particaular call. epochs_to_run: Number of epochs to run, will update the 'epochs parm. ''' assert self.mode == "training", "Create model in training mode." if batch_size == 0: batch_size = self.config.BATCH_SIZE if epochs_to_run > 0: epochs = self.epoch + epochs_to_run if steps_per_epoch == 0: steps_per_epoch = self.config.STEPS_PER_EPOCH # use Pre-defined layer regular expressions # if layers in self.layer_regex.keys(): # layers = self.layer_regex[layers] print(layers) # train_regex_list = [] # for x in layers: # print( ' layers ias : ',x) # train_regex_list.append(x) train_regex_list = [self.layer_regex[x] for x in layers] print(train_regex_list) layers = '|'.join(train_regex_list) print('layers regex :', layers) # Data generators train_generator = data_generator(train_dataset, self.config, shuffle=True, batch_size=batch_size) val_generator = data_generator(val_dataset, self.config, shuffle=True, batch_size=batch_size, augment=False) # my_callback = MyCallback() # Callbacks ## call back for model checkpoint was originally (?) loss. chanegd to val_loss (which is default) 2-5-18 callbacks = [ keras.callbacks.TensorBoard(log_dir=self.log_dir, histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None), keras.callbacks.ModelCheckpoint(self.checkpoint_path, mode='auto', period=1, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True), keras.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='auto', factor=0.3, cooldown=30, patience=50, min_lr=0.00001, verbose=1), keras.callbacks.EarlyStopping(monitor='val_loss', mode='auto', min_delta=1e-5, patience=200, verbose=1) ] # Train self.set_trainable(layers) self.compile(learning_rate, self.config.LEARNING_MOMENTUM, losses) log("Starting at epoch {} of {} epochs. LR={}\n".format( self.epoch, epochs, learning_rate)) log("Steps per epochs {} ".format(steps_per_epoch)) log("Batch size {} ".format(batch_size)) log("Checkpoint Path: {} ".format(self.checkpoint_path)) self.keras_model.fit_generator( train_generator, initial_epoch=self.epoch, epochs=epochs, steps_per_epoch=steps_per_epoch, callbacks=callbacks, validation_data=next(val_generator), validation_steps=self.config.VALIDATION_STEPS, max_queue_size=100, workers=1, # max(self.config.BATCH_SIZE // 2, 2), use_multiprocessing=False) self.epoch = max(self.epoch, epochs) print('Final : self.epoch {} epochs {}'.format(self.epoch, epochs))