def get_dataset_params(env_spec, horizon, batch_size): all_tfrecord_folders = [ os.path.join( FileManager.data_dir, 'tfrecords_collision/{0}-2019_horizon_{1}'.format(f, horizon)) for f in [ '08-02', '08-06', '08-08', '08-13', '08-15', '08-18', '08-20', '08-27', '08-29', '09-09', '09-12', '09-17', '09-19', '10-20', '10-24', '10-31' ] ] train_tfrecord_folders = [ fname for fname in all_tfrecord_folders if '09-12' not in fname ], holdout_tfrecord_folders = [ fname for fname in all_tfrecord_folders if '09-12' in fname ], kwargs_train = d( rebalance_key='outputs/collision/close', env_spec=env_spec, tfrecord_folders=train_tfrecord_folders, horizon=horizon, batch_size=batch_size, num_parallel_calls=6, shuffle_buffer_size=1000, prefetch_buffer_size_multiplier=10, ) kwargs_holdout = kwargs_train.copy() kwargs_holdout.tfrecord_folders = holdout_tfrecord_folders return d(cls=TfrecordRebalanceDataset, kwargs_train=kwargs_train, kwargs_holdout=kwargs_holdout)
def get_env_params(env_spec): return d( cls=JackalEnv, env_spec=env_spec, params=d( speed=0.8, debug=True, debug_normalize_cost_colors=False, debug_color_cost_key='collision', ) )
def get_model_params(): kwargs_train = d( config_fnames=( os.path.join(FileManager.configs_dir, 'bumpy.py'), os.path.join(FileManager.configs_dir, 'collision_position.py'), ), ckptnums=( 4, 8, ) ) kwargs_eval = kwargs_train.copy() return d( cls=MergeModel, kwargs_train=kwargs_train, kwargs_eval=kwargs_eval )
def get_model_params(env_spec, horizon): kwargs_train = d( use_both_images=False, # RNN horizon=horizon, rnn_dim=64, # inputs/outputs env_spec=env_spec, output_observations=[d(name='bumpy', is_relative=False)], is_output_gps=False, ) kwargs_eval = kwargs_train.copy() return d(cls=JackalModel, kwargs_train=kwargs_train, kwargs_eval=kwargs_eval)
def get_params(): env_spec = JackalPositionCollisionEnvSpec(left_image_only=True) model_params = get_model_params() return d( exp_name='bumpy_collision_position', model=model_params, planner=get_planner_params(env_spec), env=get_env_params(env_spec), )
def cost_fn(inputs, model_outputs, goals, actions): ### collision model_collision = tf.nn.sigmoid(model_outputs.collision.close[..., 0]) clip_value = 0.02 model_collision = tf.clip_by_value(model_collision, clip_value, 1. - clip_value) model_collision = (model_collision - clip_value) / (1. - 2 * clip_value) cost_collision = model_collision ### bumpy model_bumpy = tf.nn.sigmoid(model_outputs.bumpy[..., 0]) clip_value = 0.1 model_bumpy = tf.clip_by_value(model_bumpy, clip_value, 1. - clip_value) model_bumpy = (model_bumpy - clip_value) / (1. - 2 * clip_value) cost_bumpy = model_bumpy ### goal position cost a = model_outputs.jackal.position[..., :2] b = tf.cast(goals.position, tf.float32)[..., :2][tf.newaxis, tf.newaxis] dot_product = tf.reduce_sum(a * b, axis=2) a_norm = tf.linalg.norm(a, axis=2) b_norm = tf.linalg.norm(b, axis=2) cos_theta = dot_product / tf.maximum(a_norm * b_norm, 1e-4) theta = tf.acos(tf.clip_by_value(cos_theta, -1+1e-4, 1-1e-4)) cost_position = (1. / np.pi) * tf.abs(theta) cost_position = tf.nn.sigmoid(goals.cost_weights.position_sigmoid_scale * (cost_position - goals.cost_weights.position_sigmoid_center)) # more expensive when outside XX degrees difference ### for all costs, you only get them if you don't collied # cost_position = (1. - model_collision) * cost_position + model_collision * 1. # cost_bumpy = (1. - model_collision) * cost_bumpy + model_collision * 1. ### magnitude action cost steer = actions.commands.angular_velocity[..., 0] batch_size = tf.shape(steer)[0] cost_action_magnitude = tf.square(steer) ### smooth action cost cost_action_smooth = tf.concat([tf.square(steer[:, 1:] - steer[:, :-1]), tf.zeros([batch_size, 1])], axis=1) total_cost = goals.cost_weights.collision * cost_collision + \ goals.cost_weights.bumpy * cost_bumpy + \ goals.cost_weights.position * cost_position + \ goals.cost_weights.action_magnitude * cost_action_magnitude + \ goals.cost_weights.action_smooth * cost_action_smooth return d( total=total_cost, collision=cost_collision, bumpy=cost_bumpy, position=cost_position, action_magnitude=cost_action_magnitude, action_smooth=cost_action_smooth, ) # [batch, horizon]
def cost_fn(model_outputs, outputs): batch_size = tf.shape(outputs.done)[0] batch_size_float = tf.cast(batch_size, tf.float32) done = tf.concat( [tf.zeros([batch_size, 1], dtype=tf.bool), outputs.done[:, :-1]], axis=1) mask = tf.cast(tf.logical_not(done), tf.float32) tf.debugging.assert_positive(tf.reduce_sum(mask, axis=1)) mask = batch_size_float * (mask / tf.reduce_sum(mask)) ### bumpy model_output_bumpy = model_outputs.bumpy[..., 0] bumpy = tf.cast(outputs.bumpy, tf.bool)[..., 0] cost_bumpy = tf.reduce_sum(mask * tf.nn.weighted_cross_entropy_with_logits( targets=tf.cast(bumpy, tf.float32), logits=model_output_bumpy, pos_weight=0.2), axis=1) bumpy_accuracy = tf.reduce_mean(tf.cast( tf.equal(model_output_bumpy > 0, tf.cast(bumpy, tf.bool)), tf.float32), axis=1) bumpy_accuracy_random = tf.reduce_mean(1. - tf.cast(bumpy, tf.float32), axis=1) ### regularization cost_l2_reg = 1e-1 * \ tf.reduce_mean([0.5 * tf.reduce_mean(kernel * kernel) for kernel in model_outputs.kernels]) * \ tf.ones(batch_size) ### filter out nans costs_is_finite = tf.is_finite(cost_bumpy) cost_bumpy = tf.boolean_mask(cost_bumpy, costs_is_finite) cost_l2_reg = tf.boolean_mask(cost_l2_reg, costs_is_finite) # assert cost_l2_reg.shape[0].value > 0.5 * batch_size ### total cost = cost_bumpy + cost_l2_reg return d(total=cost, bumpy=cost_bumpy, bumpy_accuracy=bumpy_accuracy, bumpy_accuracy_random=bumpy_accuracy_random, l2_reg=cost_l2_reg)
def get_model_params(env_spec, horizon): kwargs_train = d( # jackal mode use_both_images=False, # RNN horizon=horizon, rnn_dim=64, # inputs/outputs env_spec=env_spec, output_observations=[ d(name='jackal/position', is_relative=True), d(name='collision/close', is_relative=False) ], is_output_gps=False, ) kwargs_eval = kwargs_train.copy() kwargs_eval.is_output_gps = True return d(cls=JackalPositionModel, kwargs_train=kwargs_train, kwargs_eval=kwargs_eval)
def get_params(): horizon = 8 env_spec = JackalPositionCollisionEnvSpec(left_image_only=True) model_params = get_model_params(env_spec, horizon) trainer_params = get_trainer_params() dataset_params = get_dataset_params(env_spec, horizon, trainer_params.batch_size) return d( exp_name='collision_position', dataset=dataset_params, model=model_params, trainer=trainer_params, )
def get_params(): horizon = 8 env_spec = JackalBumpyEnvSpec(left_image_only=True) model_params = get_model_params(env_spec, horizon) trainer_params = get_trainer_params() dataset_params = get_dataset_params(env_spec, horizon, trainer_params.batch_size) return d( exp_name='bumpy', dataset=dataset_params, model=model_params, trainer=trainer_params, )
def cost_fn(model_outputs, outputs): batch_size = tf.shape(outputs.done)[0] batch_size_float = tf.cast(batch_size, tf.float32) done = tf.concat( [tf.zeros([batch_size, 1], dtype=tf.bool), outputs.done[:, :-1]], axis=1) mask = tf.cast(tf.logical_not(done), tf.float32) tf.debugging.assert_positive(tf.reduce_sum(mask, axis=1)) mask = batch_size_float * (mask / tf.reduce_sum(mask)) mask = mask[..., tf.newaxis] ### position cost_position = tf.reduce_sum( mask * 0.5 * tf.square(model_outputs.jackal.position - outputs.jackal.position), axis=(1, 2)) ### collision model_output_collision = model_outputs.collision.close[..., 0] collision = tf.cast(outputs.collision.close, tf.bool)[..., 0] collision = tf.logical_and( collision, tf.logical_not(done)) # don't count collisions after done! collision = tf.cumsum(tf.cast(collision, tf.float32), axis=-1) > 0.5 # collision mask should be same as normal mask, but turned on for dones with collision = true mask_collision = tf.cast( tf.logical_or(tf.logical_not(done), collision), tf.float32) mask_collision = batch_size_float * (mask_collision / tf.reduce_sum(mask_collision)) cost_collision = 2.0 * tf.reduce_sum( mask_collision * tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.cast(collision, tf.float32), logits=model_output_collision), axis=1) collision_accuracy = tf.reduce_mean(tf.cast( tf.equal(model_output_collision > 0, tf.cast(collision, tf.bool)), tf.float32), axis=1) collision_accuracy_random = tf.reduce_mean( 1. - tf.cast(collision, tf.float32), axis=1) ### regularization cost_l2_reg = 1e-2 * \ tf.reduce_mean([0.5 * tf.reduce_mean(kernel * kernel) for kernel in model_outputs.kernels]) * \ tf.ones(batch_size) ### filter out nans costs_is_finite = tf.logical_and(tf.is_finite(cost_position), tf.is_finite(cost_collision)) cost_position = tf.boolean_mask(cost_position, costs_is_finite) cost_collision = tf.boolean_mask(cost_collision, costs_is_finite) cost_l2_reg = tf.boolean_mask(cost_l2_reg, costs_is_finite) # assert cost_l2_reg.shape[0].value > 0.5 * batch_size ### total cost = cost_position + cost_collision + cost_l2_reg return d(total=cost, position=cost_position, collision=cost_collision, collision_accuracy=collision_accuracy, collision_accuracy_random=collision_accuracy_random, l2_reg=cost_l2_reg)