Example #1
0
def build_training_graph(num_agents):

    s = tf.placeholder(tf.float32, [num_agents, 4])
    g = tf.placeholder(tf.float32, [num_agents, 2])

    x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0)
    h, mask, indices = core.network_cbf(x=x, r=config.DIST_MIN_THRES, indices=None)
    a = core.network_action(s=s, g=g, obs_radius=config.OBS_RADIUS, indices=indices)

    (loss_dang, loss_safe, acc_dang, acc_safe) = core.loss_barrier(
        h=h, s=s, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION, indices=indices)

    (loss_dang_deriv, loss_safe_deriv, acc_dang_deriv, acc_safe_deriv
        ) = core.loss_derivatives(s=s, a=a, h=h, x=x, r=config.DIST_MIN_THRES, 
        indices=indices, ttc=config.TIME_TO_COLLISION, alpha=config.ALPHA_CBF)

    loss_action = core.loss_actions(
        s=s, g=g, a=a, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION)

    loss_list = [2 * loss_dang, loss_safe, 2 * loss_dang_deriv, loss_safe_deriv, 0.01 * loss_action]
    acc_list = [acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv]

    weight_loss = [
        config.WEIGHT_DECAY * tf.nn.l2_loss(v) for v in tf.trainable_variables()]
    loss = 10 * tf.math.add_n(loss_list + weight_loss)

    return s, g, a, loss_list, loss, acc_list
Example #2
0
def build_training_graph(num_agents):
    # s is the state vectors of the agents
    s = tf.placeholder(tf.float32, [num_agents, 8])
    # s_ref is the goal states
    s_ref = tf.placeholder(tf.float32, [num_agents, 8])
    # x is difference between the state of each agent and other agents
    x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0)
    # h is the CBF value of shape [num_agents, TOP_K, 1], where TOP_K represents
    # the K nearest agents
    h, mask, indices = core.network_cbf(x=x,
                                        r=config.DIST_MIN_THRES,
                                        indices=None)
    # u is the control action of each agent, with shape [num_agents, 3]
    u = core.network_action(s=s,
                            s_ref=s_ref,
                            obs_radius=config.OBS_RADIUS,
                            indices=indices)
    # compute the value of loss functions and the accuracies
    # loss_dang is for h(s) < 0, s in dangerous set
    # loss safe is for h(s) >=0, s in safe set
    # acc_dang is the accuracy that h(s) < 0, s in dangerous set is satisfied
    # acc_safe is the accuracy that h(s) >=0, s in safe set is satisfied
    loss_dang, loss_safe, acc_dang, acc_safe = core.loss_barrier(
        h=h, s=s, indices=indices)
    # loss_dang_deriv is for doth(s) + alpha h(s) >=0 for s in dangerous set
    # loss_safe_deriv is for doth(s) + alpha h(s) >=0 for s in safe set
    # loss_medium_deriv is for doth(s) + alpha h(s) >=0 for s not in the dangerous
    # or the safe set
    (loss_dang_deriv, loss_safe_deriv, loss_medium_deriv, acc_dang_deriv,
     acc_safe_deriv, acc_medium_deriv) = core.loss_derivatives(s=s,
                                                               u=u,
                                                               h=h,
                                                               x=x,
                                                               indices=indices)
    # the distance between the u and the nominal u
    loss_action = core.loss_actions(s=s, u=u, s_ref=s_ref, indices=indices)

    # the weight of each loss item requires careful tuning
    loss_list = [
        loss_dang, loss_safe, 3 * loss_dang_deriv, loss_safe_deriv,
        2 * loss_medium_deriv, 0.5 * loss_action
    ]
    acc_list = [
        acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv, acc_medium_deriv
    ]

    weight_loss = [
        config.WEIGHT_DECAY * tf.nn.l2_loss(v)
        for v in tf.trainable_variables()
    ]
    loss = 10 * tf.math.add_n(loss_list + weight_loss)

    return s, s_ref, u, loss_list, loss, acc_list
Example #3
0
 def opt_body(u_res, loop_count, is_safe):
     dsdt = core.quadrotor_dynamics_tf(s, u + u_res)
     s_next = s + dsdt * config.TIME_STEP_EVAL
     x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0)
     h_next, mask_next, _ = core.network_cbf(x=x_next,
                                             r=config.DIST_MIN_THRES,
                                             indices=indices)
     deriv = h_next - h + config.TIME_STEP_EVAL * config.ALPHA_CBF * h
     deriv = deriv * mask * mask_next
     error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1)
     error_gradient = tf.gradients(error, u_res)[0]
     u_res = u_res - config.REFINE_LEARNING_RATE * error_gradient
     loop_count = loop_count + 1
     return u_res, loop_count, is_safe
Example #4
0
 def opt_body(a_res, loop_count):
     dsdt = core.dynamics(s, a + a_res)
     s_next = s + dsdt * config.TIME_STEP
     x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0)
     h_next, mask_next, _ = core.network_cbf(x=x_next,
                                             r=config.DIST_MIN_THRES,
                                             indices=indices)
     deriv = h_next - h + config.TIME_STEP * config.ALPHA_CBF * h
     deriv = deriv * mask * mask_next
     error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1)
     error_gradient = tf.gradients(error, a_res)[0]
     a_res = a_res - config.REFINE_LEARNING_RATE * error_gradient
     loop_count = loop_count + 1
     return a_res, loop_count
Example #5
0
 def opt_body(a_res, loop_count):
     # a loop of updating a_res
     # compute s_next under a + a_res
     dsdt = core.dynamics(s, a + a_res)
     s_next = s + dsdt * config.TIME_STEP
     x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0)
     h_next, mask_next, _ = core.network_cbf(
         x=x_next, r=config.DIST_MIN_THRES, indices=indices)
     # deriv should be >= 0. if not, we update a_res by gradient descent
     deriv = h_next - h + config.TIME_STEP * config.ALPHA_CBF * h
     deriv = deriv * mask * mask_next
     error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1)
     # compute the gradient to update a_res
     error_gradient = tf.gradients(error, a_res)[0]
     a_res = a_res - config.REFINE_LEARNING_RATE * error_gradient
     loop_count = loop_count + 1
     return a_res, loop_count
Example #6
0
def build_training_graph(num_agents):

    s = tf.placeholder(tf.float32, [num_agents, 8])
    s_ref = tf.placeholder(tf.float32, [num_agents, 8])

    x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0)
    h, mask, indices = core.network_cbf(x=x,
                                        r=config.DIST_MIN_THRES,
                                        indices=None)
    u = core.network_action(s=s,
                            s_ref=s_ref,
                            obs_radius=config.OBS_RADIUS,
                            indices=indices)
    loss_dang, loss_safe, acc_dang, acc_safe = core.loss_barrier(
        h=h, s=s, indices=indices)
    (loss_dang_deriv, loss_safe_deriv, loss_medium_deriv, acc_dang_deriv,
     acc_safe_deriv, acc_medium_deriv) = core.loss_derivatives(s=s,
                                                               u=u,
                                                               h=h,
                                                               x=x,
                                                               indices=indices)

    loss_action = core.loss_actions(s=s, u=u, s_ref=s_ref, indices=indices)

    # the weight of each loss item requires careful tuning
    loss_list = [
        loss_dang, loss_safe, 3 * loss_dang_deriv, loss_safe_deriv,
        2 * loss_medium_deriv, 0.5 * loss_action
    ]
    acc_list = [
        acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv, acc_medium_deriv
    ]

    weight_loss = [
        config.WEIGHT_DECAY * tf.nn.l2_loss(v)
        for v in tf.trainable_variables()
    ]
    loss = 10 * tf.math.add_n(loss_list + weight_loss)

    return s, s_ref, u, loss_list, loss, acc_list
Example #7
0
def build_evaluation_graph(num_agents):
    s = tf.placeholder(tf.float32, [num_agents, 8])
    s_ref = tf.placeholder(tf.float32, [num_agents, 8])

    x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0)
    h, mask, indices = core.network_cbf(x=x,
                                        r=config.DIST_MIN_THRES,
                                        indices=None)
    u = core.network_action(s=s,
                            s_ref=s_ref,
                            obs_radius=config.OBS_RADIUS,
                            indices=indices)
    safe_mask = core.compute_safe_mask(s, r=config.DIST_SAFE, indices=indices)
    is_safe = tf.equal(tf.reduce_mean(tf.cast(safe_mask, tf.float32)), 1)

    u_res = tf.Variable(tf.zeros_like(u), name='u_res')
    loop_count = tf.Variable(0, name='loop_count')

    def opt_body(u_res, loop_count, is_safe):
        dsdt = core.quadrotor_dynamics_tf(s, u + u_res)
        s_next = s + dsdt * config.TIME_STEP_EVAL
        x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0)
        h_next, mask_next, _ = core.network_cbf(x=x_next,
                                                r=config.DIST_MIN_THRES,
                                                indices=indices)
        deriv = h_next - h + config.TIME_STEP_EVAL * config.ALPHA_CBF * h
        deriv = deriv * mask * mask_next
        error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1)
        error_gradient = tf.gradients(error, u_res)[0]
        u_res = u_res - config.REFINE_LEARNING_RATE * error_gradient
        loop_count = loop_count + 1
        return u_res, loop_count, is_safe

    def opt_cond(u_res, loop_count, is_safe):
        cond = tf.logical_and(tf.less(loop_count, config.REFINE_LOOPS),
                              tf.logical_not(is_safe))
        return cond

    with tf.control_dependencies(
        [u_res.assign(tf.zeros_like(u)),
         loop_count.assign(0)]):
        u_res, _, _ = tf.while_loop(opt_cond, opt_body,
                                    [u_res, loop_count, is_safe])
        u_opt = u + u_res

    loss_dang, loss_safe, acc_dang, acc_safe = core.loss_barrier(
        h=h, s=s, indices=indices)
    (loss_dang_deriv, loss_safe_deriv, loss_medium_deriv, acc_dang_deriv,
     acc_safe_deriv, acc_medium_deriv) = core.loss_derivatives(s=s,
                                                               u=u_opt,
                                                               h=h,
                                                               x=x,
                                                               indices=indices)

    loss_action = core.loss_actions(s=s, u=u_opt, s_ref=s_ref, indices=indices)

    loss_list = [
        loss_dang, loss_safe, loss_dang_deriv, loss_safe_deriv,
        loss_medium_deriv, loss_action
    ]
    acc_list = [
        acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv, acc_medium_deriv
    ]

    return s, s_ref, u_opt, loss_list, acc_list
Example #8
0
def build_evaluation_graph(num_agents):
    # s is the state vectors of the agents
    s = tf.placeholder(tf.float32, [num_agents, 4])
    # g is the goal states
    g = tf.placeholder(tf.float32, [num_agents, 2])
    # x is difference between the state of each agent and other agents
    x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0)
    # h is the CBF value of shape [num_agents, TOP_K, 1], where TOP_K represents
    # the K nearest agents
    h, mask, indices = core.network_cbf(x=x, r=config.DIST_MIN_THRES)
    # a is the control action of each agent, with shape [num_agents, 3]
    a = core.network_action(s=s, g=g, obs_radius=config.OBS_RADIUS, indices=indices)
    # a_res is delta a. when a does not satisfy the CBF conditions, we want to compute
    # a a_res such that a + a_res satisfies the CBF conditions
    a_res = tf.Variable(tf.zeros_like(a), name='a_res')
    loop_count = tf.Variable(0, name='loop_count')
   
    def opt_body(a_res, loop_count):
        # a loop of updating a_res
        # compute s_next under a + a_res
        dsdt = core.dynamics(s, a + a_res)
        s_next = s + dsdt * config.TIME_STEP
        x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0)
        h_next, mask_next, _ = core.network_cbf(
            x=x_next, r=config.DIST_MIN_THRES, indices=indices)
        # deriv should be >= 0. if not, we update a_res by gradient descent
        deriv = h_next - h + config.TIME_STEP * config.ALPHA_CBF * h
        deriv = deriv * mask * mask_next
        error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1)
        # compute the gradient to update a_res
        error_gradient = tf.gradients(error, a_res)[0]
        a_res = a_res - config.REFINE_LEARNING_RATE * error_gradient
        loop_count = loop_count + 1
        return a_res, loop_count

    def opt_cond(a_res, loop_count):
        # update u_res for REFINE_LOOPS
        cond = tf.less(loop_count, config.REFINE_LOOPS)
        return cond
    
    with tf.control_dependencies([
        a_res.assign(tf.zeros_like(a)), loop_count.assign(0)]):
        a_res, _ = tf.while_loop(opt_cond, opt_body, [a_res, loop_count])
        a_opt = a + a_res

    dsdt = core.dynamics(s, a_opt)
    s_next = s + dsdt * config.TIME_STEP
    x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0)
    h_next, mask_next, _ = core.network_cbf(x=x_next, r=config.DIST_MIN_THRES, indices=indices)
    
    # compute the value of loss functions and the accuracies
    # loss_dang is for h(s) < 0, s in dangerous set
    # loss safe is for h(s) >=0, s in safe set
    # acc_dang is the accuracy that h(s) < 0, s in dangerous set is satisfied
    # acc_safe is the accuracy that h(s) >=0, s in safe set is satisfied
    (loss_dang, loss_safe, acc_dang, acc_safe) = core.loss_barrier(
        h=h_next, s=s_next, r=config.DIST_MIN_THRES, 
        ttc=config.TIME_TO_COLLISION, eps=[0, 0])
    # loss_dang_deriv is for doth(s) + alpha h(s) >=0 for s in dangerous set
    # loss_safe_deriv is for doth(s) + alpha h(s) >=0 for s in safe set
    # loss_medium_deriv is for doth(s) + alpha h(s) >=0 for s not in the dangerous
    # or the safe set
    (loss_dang_deriv, loss_safe_deriv, acc_dang_deriv, acc_safe_deriv
        ) = core.loss_derivatives(s=s_next, a=a_opt, h=h_next, x=x_next, 
        r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION, alpha=config.ALPHA_CBF, indices=indices)
    # the distance between the u_opt and the nominal u
    loss_action = core.loss_actions(s, g, a, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION)

    loss_list = [loss_dang, loss_safe, loss_dang_deriv, loss_safe_deriv, loss_action]
    acc_list = [acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv]

    return s, g, a_opt, loss_list, acc_list
Example #9
0
def build_evaluation_graph(num_agents):
    s = tf.placeholder(tf.float32, [num_agents, 4])
    g = tf.placeholder(tf.float32, [num_agents, 2])

    x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0)
    h, mask, indices = core.network_cbf(x=x, r=config.DIST_MIN_THRES)
    a = core.network_action(s=s,
                            g=g,
                            obs_radius=config.OBS_RADIUS,
                            indices=indices)

    a_res = tf.Variable(tf.zeros_like(a), name='a_res')
    loop_count = tf.Variable(0, name='loop_count')

    def opt_body(a_res, loop_count):
        dsdt = core.dynamics(s, a + a_res)
        s_next = s + dsdt * config.TIME_STEP
        x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0)
        h_next, mask_next, _ = core.network_cbf(x=x_next,
                                                r=config.DIST_MIN_THRES,
                                                indices=indices)
        deriv = h_next - h + config.TIME_STEP * config.ALPHA_CBF * h
        deriv = deriv * mask * mask_next
        error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1)
        error_gradient = tf.gradients(error, a_res)[0]
        a_res = a_res - config.REFINE_LEARNING_RATE * error_gradient
        loop_count = loop_count + 1
        return a_res, loop_count

    def opt_cond(a_res, loop_count):
        cond = tf.less(loop_count, config.REFINE_LOOPS)
        return cond

    with tf.control_dependencies(
        [a_res.assign(tf.zeros_like(a)),
         loop_count.assign(0)]):
        a_res, _ = tf.while_loop(opt_cond, opt_body, [a_res, loop_count])
        a_opt = a + a_res

    dsdt = core.dynamics(s, a_opt)
    s_next = s + dsdt * config.TIME_STEP
    x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0)
    h_next, mask_next, _ = core.network_cbf(x=x_next,
                                            r=config.DIST_MIN_THRES,
                                            indices=indices)

    (loss_dang, loss_safe, acc_dang,
     acc_safe) = core.loss_barrier(h=h_next,
                                   s=s_next,
                                   r=config.DIST_MIN_THRES,
                                   ttc=config.TIME_TO_COLLISION,
                                   eps=[0, 0])

    (loss_dang_deriv, loss_safe_deriv, acc_dang_deriv,
     acc_safe_deriv) = core.loss_derivatives(s=s_next,
                                             a=a_opt,
                                             h=h_next,
                                             x=x_next,
                                             r=config.DIST_MIN_THRES,
                                             ttc=config.TIME_TO_COLLISION,
                                             alpha=config.ALPHA_CBF,
                                             indices=indices)

    loss_action = core.loss_actions(s,
                                    g,
                                    a,
                                    r=config.DIST_MIN_THRES,
                                    ttc=config.TIME_TO_COLLISION)

    loss_list = [
        loss_dang, loss_safe, loss_dang_deriv, loss_safe_deriv, loss_action
    ]
    acc_list = [acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv]

    return s, g, a_opt, loss_list, acc_list
Example #10
0
def build_training_graph(num_agents):
    # s is the state vectors of the agents
    s = tf.placeholder(tf.float32, [num_agents, 4])
    # g is the goal states
    g = tf.placeholder(tf.float32, [num_agents, 2])
    # x is difference between the state of each agent and other agents
    x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0)
    # h is the CBF value of shape [num_agents, TOP_K, 1], where TOP_K represents
    # the K nearest agents
    h, mask, indices = core.network_cbf(x=x,
                                        r=config.DIST_MIN_THRES,
                                        indices=None)
    # a is the control action of each agent, with shape [num_agents, 2]
    a = core.network_action(s=s,
                            g=g,
                            obs_radius=config.OBS_RADIUS,
                            indices=indices)
    # compute the value of loss functions and the accuracies
    # loss_dang is for h(s) < 0, s in dangerous set
    # loss safe is for h(s) >=0, s in safe set
    # acc_dang is the accuracy that h(s) < 0, s in dangerous set is satisfied
    # acc_safe is the accuracy that h(s) >=0, s in safe set is satisfied
    (loss_dang, loss_safe, acc_dang,
     acc_safe) = core.loss_barrier(h=h,
                                   s=s,
                                   r=config.DIST_MIN_THRES,
                                   ttc=config.TIME_TO_COLLISION,
                                   indices=indices)
    # loss_dang_deriv is for doth(s) + alpha h(s) >=0 for s in dangerous set
    # loss_safe_deriv is for doth(s) + alpha h(s) >=0 for s in safe set
    # loss_medium_deriv is for doth(s) + alpha h(s) >=0 for s not in the dangerous
    # or the safe set
    (loss_dang_deriv, loss_safe_deriv, acc_dang_deriv,
     acc_safe_deriv) = core.loss_derivatives(s=s,
                                             a=a,
                                             h=h,
                                             x=x,
                                             r=config.DIST_MIN_THRES,
                                             indices=indices,
                                             ttc=config.TIME_TO_COLLISION,
                                             alpha=config.ALPHA_CBF)
    # the distance between the a and the nominal a
    loss_action = core.loss_actions(s=s,
                                    g=g,
                                    a=a,
                                    r=config.DIST_MIN_THRES,
                                    ttc=config.TIME_TO_COLLISION)

    loss_list = [
        2 * loss_dang, loss_safe, 2 * loss_dang_deriv, loss_safe_deriv,
        0.01 * loss_action
    ]
    acc_list = [acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv]

    weight_loss = [
        config.WEIGHT_DECAY * tf.nn.l2_loss(v)
        for v in tf.trainable_variables()
    ]
    loss = 10 * tf.math.add_n(loss_list + weight_loss)

    return s, g, a, loss_list, loss, acc_list