def load_from_path(cls, path, flags=None): """Instantiate a Model for eval using flags and weights from a saved model. Currently only supports models trained by the experiment runner, since Model itself doesn't save flags (so we rely the runner's experiment.json) Args: path: Path to model directory (which contains stage folders). flags: Additional flags for loading the model. Raises: ValueError: If folder of path contains no stage folders. Returns: model: Instantiated model with saved weights. """ # Read the flags from the experiment.json file # experiment.json is in the folder above # Remove last '/' if present path = path.rstrip('/') if not path.startswith('gs://'): path = util.expand_path(path) if flags is None: flags = lib_flags.Flags() flags['train_root_dir'] = path experiment_json_path = os.path.join(path, 'experiment.json') try: # Read json to dict with tf.gfile.GFile(experiment_json_path, 'r') as f: experiment_json = json.load(f) # Load dict as a Flags() object flags.load(experiment_json) except Exception as e: # pylint: disable=broad-except print("Warning! Couldn't load model flags from experiment.json") print(e) # Set default flags set_flags(flags) flags.print_values() # Get list_of_directories train_sub_dirs = sorted([ sub_dir for sub_dir in tf.gfile.ListDirectory(path) if sub_dir.startswith('stage_') ]) if not train_sub_dirs: raise ValueError( 'No stage folders found, is %s the correct model path?' % path) # Get last checkpoint last_stage_dir = train_sub_dirs[-1] stage_id = int(last_stage_dir.split('_')[-1].strip('/')) weights_dir = os.path.join(path, last_stage_dir) ckpt = tf.train.latest_checkpoint(weights_dir) print('Load model from {}'.format(ckpt)) # Load the model, use eval_batch_size if present batch_size = flags.get('eval_batch_size', train_util.get_batch_size(stage_id, **flags)) model = cls(stage_id, batch_size, flags) model.saver.restore(model.sess, ckpt) return model
def load_from_path(cls, path, flags=None): """Instantiate a Model for eval using flags and weights from a saved model. Currently only supports models trained by the experiment runner, since Model itself doesn't save flags (so we rely the runner's experiment.json) Args: path: Path to model directory (which contains stage folders). flags: Additional flags for loading the model. Raises: ValueError: If folder of path contains no stage folders. Returns: model: Instantiated model with saved weights. """ # Read the flags from the experiment.json file # experiment.json is in the folder above # Remove last '/' if present path = path[:-1] if path.endswith('/') else path path = util.expand_path(path) if flags is None: flags = lib_flags.Flags() flags['train_root_dir'] = path experiment_json_path = os.path.join(path, 'experiment.json') try: # Read json to dict with tf.gfile.GFile(experiment_json_path, 'r') as f: experiment_json = json.load(f) # Load dict as a Flags() object flags.load(experiment_json) except Exception as e: # pylint: disable=broad-except print("Warning! Couldn't load model flags from experiment.json") print(e) # Set default flags set_flags(flags) flags.print_values() # Get list_of_directories train_sub_dirs = sorted([sub_dir for sub_dir in tf.gfile.ListDirectory(path) if sub_dir.startswith('stage_')]) if not train_sub_dirs: raise ValueError('No stage folders found, is %s the correct model path?' % path) # Get last checkpoint last_stage_dir = train_sub_dirs[-1] stage_id = int(last_stage_dir.split('_')[-1]) weights_dir = os.path.join(path, last_stage_dir) ckpt = tf.train.latest_checkpoint(weights_dir) print('Load model from {}'.format(ckpt)) # Load the model, use eval_batch_size if present batch_size = flags.get('eval_batch_size', train_util.get_batch_size(stage_id, **flags)) model = cls(stage_id, batch_size, flags) model.saver.restore(model.sess, ckpt) return model
def run(config): """Entry point to run training.""" init_data_normalizer(config) stage_ids = train_util.get_stage_ids(**config) if not config['train_progressive']: stage_ids = stage_ids[-1:] # Train one stage at a time for stage_id in stage_ids: batch_size = train_util.get_batch_size(stage_id, **config) tf.reset_default_graph() with tf.device(tf.train.replica_device_setter(config['ps_tasks'])): model = lib_model.Model(stage_id, batch_size, config) model.add_summaries() print('Variables:') for v in tf.global_variables(): print('\t', v.name, v.get_shape().as_list()) logging.info('Calling train.train') train_util.train(model, **config)
def run(config): """Entry point to run training.""" init_data_normalizer(config) stage_ids = train_util.get_stage_ids(**config) if not config['train_progressive']: stage_ids = stage_ids[-1:] # Train one stage at a time for stage_id in stage_ids: batch_size = train_util.get_batch_size(stage_id, **config) tf.reset_default_graph() with tf.device(tf.train.replica_device_setter(config['ps_tasks'])): model = lib_model.Model(stage_id, batch_size, config) model.add_summaries() print('Variables:') for v in tf.global_variables(): print('\t', v.name, v.get_shape().as_list()) logging.info('Calling train.train') train_util.train(model, **config)