def begin(self): self._global_step_tensor = tf.train.get_global_step() if self._global_step_tensor is None: raise RuntimeError('Global step should be created to use PlottingHook.') if not gfile.exists(self._logdir): gfile.makedirs(self._logdir)
def main(_): config = flags.FLAGS gfile.makedirs(config.checkpoint_dir) if config.mode == "train": train(config) elif config.mode == "evaluate_pair": while True: checkpoint_path = utils.maybe_pick_models_to_evaluate( checkpoint_dir=config.checkpoint_dir) if checkpoint_path: evaluate_pair( config=config, batch_size=config.batch_size, checkpoint_path=checkpoint_path, data_dir=config.data_dir, dataset=config.dataset, num_examples_for_eval=config.num_examples_for_eval) else: logging.info( "No models to evaluate found, sleeping for %d seconds", EVALUATOR_SLEEP_PERIOD) time.sleep(EVALUATOR_SLEEP_PERIOD) else: raise Exception( "Unexpected mode %s, supported modes are \"train\" or \"evaluate_pair\"" % (config.mode))
def __init__(self, log_dir): """Create a new SummaryWriter. Args: log_dir: path to record tfevents files in. """ # If needed, create log_dir directory as well as missing parent directories. if not gfile.isdir(log_dir): gfile.makedirs(log_dir) self._event_writer = EventFileWriter(log_dir, 10, 120, None) self._closed = False
def main(_): tf.debugging.set_log_device_placement(True) gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: # Currently, memory growth needs to be the same across GPUs for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.experimental.list_logical_devices('GPU') print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") print('GPU memory is set to grow') except RuntimeError as e: print('Failed to set memory growth!!!!!!!!') # Memory growth must be set before GPUs have been initialized print(e) # Place tensors on the GPU with tf.device('/GPU:0'): config = flags.FLAGS gfile.makedirs(config.checkpoint_dir) if config.mode == "train": train(config) elif config.mode == "evaluate_pair": while True: checkpoint_path = utils.maybe_pick_models_to_evaluate( checkpoint_dir=config.checkpoint_dir) if checkpoint_path: evaluate_pair( config=config, batch_size=config.batch_size, checkpoint_path=checkpoint_path, data_dir=config.data_dir, dataset=config.dataset, num_examples_for_eval=config.num_examples_for_eval) else: logging.info("No models to evaluate found, sleeping for %d seconds", EVALUATOR_SLEEP_PERIOD) time.sleep(EVALUATOR_SLEEP_PERIOD) else: raise Exception( "Unexpected mode %s, supported modes are \"train\" or \"evaluate_pair\"" % (config.mode))
def __init__( self, output_dir, env_name='PongNoFrameskip-v4', env_kwargs=None, train_batch_size=16, eval_batch_size=16, trainer_class=ppo_trainer.PPO, action_multipliers=None, observation_metrics=( ('eval', 'eval/raw_reward_mean/temperature_1.0'), ('eval', 'eval/raw_reward_std/temperature_1.0'), ), include_controls_in_observation=False, reward_metric=('eval', 'eval/raw_reward_mean/temperature_1.0'), train_epochs=100, env_steps=100, # This is a tuple instead of a dict because the controls are # ordered in the action space. control_configs=( # (name, start, (low, high), flip) ('learning_rate', 1e-3, (1e-9, 10.0), False), ), observation_range=(0.0, 10.0), # Don't save checkpoints by default, as they tend to use a lot of # space. should_save_checkpoints=False, # Same here. should_write_summaries=False, ): if action_multipliers is None: action_multipliers = self.DEFAULT_ACTION_MULTIPLIERS if env_kwargs is None: env_kwargs = {} (train_env, eval_env) = tuple( env_problem_utils.make_env( # pylint: disable=g-complex-comprehension env_problem_name=env_name, batch_size=batch_size, **env_kwargs) for batch_size in (train_batch_size, eval_batch_size)) # Initialize Trainer in OnlineTuneRLEnv lazily to prevent long startup in # the async setup, where we just use the environments as containers for # trajectories. self._trainer_fn = functools.partial( trainer_class, train_env=train_env, eval_env=eval_env, controller=(lambda history: lambda step: self._current_controls), should_save_checkpoints=should_save_checkpoints, should_write_summaries=should_write_summaries, ) self._trainer = None self._action_multipliers = action_multipliers self._observation_metrics = observation_metrics self._include_controls_in_observation = include_controls_in_observation self._reward_metric = reward_metric self._train_epochs = train_epochs self._env_steps = env_steps self._control_configs = control_configs self._observation_range = observation_range self._output_dir = output_dir gfile.makedirs(self._output_dir) # Actions are indices in self._action_multipliers. self.action_space = gym.spaces.MultiDiscrete( [len(self._action_multipliers)] * len(self._control_configs)) # Observation is a vector with the values of the metrics specified in # observation_metrics plus optionally the current controls. observation_dim = (len(self._observation_metrics) + int(self._include_controls_in_observation) * len(self._control_configs)) (obs_low, obs_high) = observation_range self.observation_space = gym.spaces.Box( # Observations are clipped to this range. low=obs_low, high=obs_high, shape=(observation_dim, ), )
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') if not FLAGS.input_ply: raise IOError('--input_ply must be specified.') if not FLAGS.output_ply: FLAGS.output_ply = FLAGS.input_ply.replace('.ply', '.reconstruct.ply') # load point cloud from ply file v, n = pu.read_point_ply(FLAGS.input_ply) # check if part size is too large min_bb = np.min(np.max(v, axis=0) - np.min(v, axis=0)) if FLAGS.part_size > 0.25 * min_bb: warnings.warn( 'WARNING: part_size seems too large. Recommend using a part_size < ' '{:.2f} for this shape.'.format(0.25 * min_bb), UserWarning) surface_points = np.concatenate([v, n], axis=1) near_surface_samples = rec.get_in_out_from_ray(surface_points, sample_factor=10, std=0.01) xmin = np.min(surface_points[:, :3], 0) xmax = np.max(surface_points[:, :3], 0) # add some extra slack to xmin and xmax xmin -= FLAGS.part_size xmax += FLAGS.part_size if FLAGS.res_per_part == 0: res_per_part = int(64 * FLAGS.part_size) else: res_per_part = FLAGS.res_per_part npts = min(near_surface_samples.shape[0], FLAGS.npoints) - 1 print('Performing latent grid optimization...') v, f, _, _ = rec.encode_decoder_one_scene(near_surface_samples, FLAGS.ckpt_dir, FLAGS.part_size, overlap=True, indep_pt_loss=True, init_std=FLAGS.init_std, xmin=xmin, xmax=xmax, res_per_part=res_per_part, npts=npts, steps=FLAGS.steps) out_dir = os.path.dirname(FLAGS.output_ply) if out_dir and not gfile.exists(out_dir): gfile.makedirs(out_dir) mesh = trimesh.Trimesh(v, f) if FLAGS.postprocess: print('Postprocessing generated mesh...') mesh = postprocess.remove_backface(mesh, surface_points) print('Writing reconstructed mesh to {}'.format(FLAGS.output_ply)) with gfile.GFile(FLAGS.output_ply, 'wb') as fh: mesh.export(fh, 'ply')
def continuously_collect_trajectories(output_dir, train_env, eval_env, trajectory_dump_dir=None, env_id=None, max_trajectories_to_collect=None, try_abort=True): """Instantiates a PPO trainer and collects trajectories.""" # Make the PPO trainer. ppo_trainer = rl_trainers.PPO( output_dir=output_dir, train_env=train_env, eval_env=eval_env, trajectory_dump_dir=trajectory_dump_dir, ) # TODO(afrozm): Update base_trainer interface to support SimPLe as well. assert isinstance(ppo_trainer, rl_trainers.PPO) assert env_id is not None # Get an initial policy and wait a forever to get it if needed. policy_and_epoch = get_newer_policy_model_file(output_dir, wait_forever=True) assert policy_and_epoch policy_file, epoch = policy_and_epoch logging.info('Read initial policy for epoch [%s] -> [%s]', epoch, policy_file) # Returns immediately if there is a newer epoch available. def is_newer_policy_file_available(epoch_, sleep_time_secs_=0.1): return get_newer_policy_model_file( output_dir, min_epoch=epoch_, sleep_time_secs=sleep_time_secs_) # Does a __done__ file exist? def done_file_exists(): return gfile.exists(os.path.join(output_dir, '__done__')) assert 1 == train_env.batch_size assert 1 == eval_env.batch_size temperature = 1.0 trajectories_collected = 0 train_env_trajectory_dump_dir = os.path.join(output_dir, 'trajectories/train') eval_env_trajectory_dump_dir = os.path.join(output_dir, 'trajectories/eval') gfile.makedirs(train_env_trajectory_dump_dir) gfile.makedirs(eval_env_trajectory_dump_dir) while max_trajectories_to_collect is None or trajectories_collected < int( max_trajectories_to_collect): logging.info('Collecting a trajectory, trajectories_collected = %s', trajectories_collected) # Abort function -- if something newever is available, then abort the # current computation and reload. # Useful if env.step is long. def long_abort_fn(): # We want this to be as quick as possible. return (is_newer_policy_file_available(epoch, 0) is not None) or ( done_file_exists()) abort_fn = long_abort_fn if try_abort else None # Collect a training trajectory. trajs, n_done, unused_timing_info, unused_model_state = ( ppo_trainer.collect_trajectories(train=True, temperature=temperature, abort_fn=abort_fn, raw_trajectory=True)) if done_file_exists(): logging.info('__done__ file found in %s, we are done here.', output_dir) break if trajs and n_done > 0: assert 1 == n_done trajectories_collected += n_done # Write the trajectory down. logging.info( 'Dumping the collected trajectory, trajectories_collected = %s', trajectories_collected) dump_trajectory(train_env_trajectory_dump_dir, epoch, env_id, temperature, str(random.randint(0, 2**31 - 1)), trajs) else: logging.info('Computation was aborted, a new policy is available.') # This maybe useless, since `abort_fn` will take care of it. We might want # to have this here if abort_fn is False always. # Do we have a newer policy? policy_file_and_epoch = is_newer_policy_file_available(epoch) if policy_file_and_epoch is None: # Continue churning out these policies. logging.info("We don't have a newer policy, continuing with the old one.") continue # We have a newer policy, read it and update the parameters. policy_file, epoch = policy_file_and_epoch logging.info( 'We have a newer policy epoch [%s], file [%s], updating parameters.', epoch, policy_file) ppo_trainer.update_optimization_state(output_dir) logging.info('Parameters of PPOTrainer updated.') # Check that the epochs match. assert epoch == ppo_trainer.epoch
def export(self, path, session, overwrite=False): """Build the TF-Hub spec, module and sync ops.""" method_specs = {} def module_fn(): """A module_fn for use with hub.create_module_spec().""" # We will use a copy of the original object to build the graph. wrapped_object = self._object_factory() for method_name, method_info in self._captured_calls.items(): captured_inputs, captured_specs = method_info tensor_inputs = nest.map_structure(_to_placeholder, captured_inputs) method_to_call = getattr(wrapped_object, method_name) tensor_outputs = method_to_call(**tensor_inputs) flat_tensor_inputs = nest.flatten(tensor_inputs) flat_tensor_inputs = { str(k): v for k, v in zip( range(len(flat_tensor_inputs)), flat_tensor_inputs) } flat_tensor_outputs = nest.flatten(tensor_outputs) flat_tensor_outputs = { str(k): v for k, v in zip( range(len(flat_tensor_outputs)), flat_tensor_outputs) } method_specs[method_name] = dict( specs=captured_specs, inputs=nest.map_structure(lambda _: None, tensor_inputs), outputs=nest.map_structure(lambda _: None, tensor_outputs)) signature_name = ("default" if method_name == "__call__" else method_name) hub.add_signature(signature_name, flat_tensor_inputs, flat_tensor_outputs) hub.attach_message( "methods", tf.train.BytesList(value=[pickle.dumps(method_specs)])) hub.attach_message( "properties", tf.train.BytesList(value=[pickle.dumps(self._captured_attrs)])) # Create the spec that will be later used in export. hub_spec = hub.create_module_spec(module_fn, drop_collections=["sonnet"]) # Get variables values module_weights = [ session.run(v) for v in self._wrapped_object.get_all_variables() ] # create the sync ops with tf.Graph().as_default(): hub_module = hub.Module(hub_spec, trainable=True, name="hub") assign_ops = [] assign_phs = [] for _, v in sorted(hub_module.variable_map.items()): ph = tf.placeholder(shape=v.shape, dtype=v.dtype) assign_phs.append(ph) assign_ops.append(tf.assign(v, ph)) with tf.Session() as module_session: module_session.run(tf.local_variables_initializer()) module_session.run(tf.global_variables_initializer()) module_session.run( assign_ops, feed_dict=dict(zip(assign_phs, module_weights))) if overwrite and gfile.exists(path): gfile.rmtree(path) gfile.makedirs(path) hub_module.export(path, module_session)