Python discounted_returnの例、planet.control.discounted_return.discounted_return Pythonの例

コード例 #1

0

ファイルを表示

ファイル: planning.py プロジェクト: piojanu/planet

 def iteration(mean_and_stddev, _):
     mean, stddev = mean_and_stddev
     # Sample action proposals from belief for each env in batch, candidate and horizon step
     normal = tf.random_normal((original_batch, amount, horizon) +
                               action_shape)
     # Action shape: (envs batch size, candidates amount, horizon) + action_shape
     action = normal * stddev[:, None] + mean[:, None]
     # Reshape to extended_batch format (original_batch * amount, horizon) + action_shape
     action = tf.reshape(action, (extended_batch, horizon) + action_shape)
     if discrete_action:
         # Normalize action scores
         action = tf.nn.l2_normalize(action, axis=-1)
         # Apply greedy policy
         postproc_action = greedy(action, action_shape[0])
     else:
         # Clip action to valid range
         action = tf.clip_by_value(action, min_action, max_action)
         # Keep continuous actions
         postproc_action = action
     # Evaluate proposal actions
     (_, state), _ = tf.nn.dynamic_rnn(cell,
                                       (0 * obs, postproc_action, use_obs),
                                       initial_state=initial_state)
     reward = objective_fn(state)
     return_ = discounted_return.discounted_return(reward, length,
                                                   discount)[:, 0]
     # Reshape back to (envs batch size, candidates amount) format
     return_ = tf.reshape(return_, (original_batch, amount))
     # Indices have shape (envs batch size, topk) and those are candidates indices
     # for each env in the batch!
     _, indices = tf.nn.top_k(return_, topk, sorted=False)
     # Offset each index so it matches indices of action which has `extended_batch` first dim.
     indices += tf.range(original_batch)[:, None] * amount
     # best_actions have shape indices.shape + action.shape[1:], which is
     # (envs batch size, topk, horizon) + action_shape
     best_actions = tf.gather(action, indices)
     # Calculate new belief from best actions, shape: (envs batch size, horizon) + action_shape
     mean, variance = tf.nn.moments(best_actions, 1)
     stddev = tf.sqrt(variance + 1e-6)
     return mean, stddev

コード例 #2

0

ファイルを表示

ファイル: planning.py プロジェクト: astronautas/planet

 def iteration(mean_and_stddev, _):
     mean, stddev = mean_and_stddev
     # Sample action proposals from belief.
     normal = tf.random_normal((original_batch, amount, horizon) +
                               action_shape)
     action = normal * stddev[:, None] + mean[:, None]
     action = tf.clip_by_value(action, min_action, max_action)
     # Evaluate proposal actions.
     action = tf.reshape(action, (extended_batch, horizon) + action_shape)
     (_, state), _ = tf.nn.dynamic_rnn(cell, (0 * obs, action, use_obs),
                                       initial_state=initial_state)
     reward = objective_fn(state)
     return_ = discounted_return.discounted_return(reward, length,
                                                   discount)[:, 0]
     return_ = tf.reshape(return_, (original_batch, amount))
     # Re-fit belief to the best ones.
     _, indices = tf.nn.top_k(return_, topk, sorted=False)
     indices += tf.range(original_batch)[:, None] * amount
     best_actions = tf.gather(action, indices)
     mean, variance = tf.nn.moments(best_actions, 1)
     stddev = tf.sqrt(variance + 1e-6)
     return mean, stddev

コード例 #3

0

ファイルを表示

ファイル: planning.py プロジェクト: leslie27ch/planet_A

    def iteration(mean_and_stddev, _):
        mean, stddev = mean_and_stddev
        # Sample action proposals from belief.
        normal = tf.random_normal((original_batch, amount, horizon) +
                                  action_shape)
        action = normal * stddev[:, None] + mean[:, None]
        action = tf.clip_by_value(action, min_action, max_action)
        # Evaluate proposal actions.
        action = tf.reshape(action, (extended_batch, horizon) + action_shape)
        (_, state), _ = tf.nn.dynamic_rnn(cell, (0 * obs, action, use_obs),
                                          initial_state=initial_state)

        # objectives
        objectives = objective_fn(
            state
        )  # shape: ['reward':shape(1000,12), 'angular_speed_degree':shape(1000,12), ...]
        reward = objectives['reward']
        angular_speed = objectives['angular_speed_degree']
        forward_speed = objectives['forward_speed'] / 10.0
        collided = objectives['collided']
        intersection_offroad = objectives['intersection_offroad']
        intersection_otherlane = objectives['intersection_otherlane']

        # #################    #1. define reward for planning
        # return_ = discounted_return.discounted_return(
        #     reward, length, discount)[:, 0]
        # total_return = tf.reshape(return_, (original_batch, amount))

        if not PLANNING:
            ##################    #2. define reward for planning
            return_ = discounted_return.discounted_return(
                reward, length, discount)[:, 0]  # shape: (1000,)
            return_ = tf.reshape(return_,
                                 (original_batch, amount))  # shape: (1, 1000)

            # threshold_degree = tf.where(dist_to_intersection<10, 9.0*(10 - dist_to_intersection), 0)
            threshold_degree = tf.where(dist_to_intersection < 9,
                                        9 * (9 - dist_to_intersection), 0)
            angular_turn_ = discounted_return.discounted_return(
                angular_speed, length, 1.0)[:, 0]  # shape: (1000,)
            # angular_turn_abs = discounted_return.discounted_return(-tf.abs(angular_speed), length, 1.0)[:, 0]
            # angular_turn_relative = tf.reduce_sum(-tf.abs(angular_speed[...,1:]-angular_speed[...,:-1]),axis=-1)
            heading_loss = - tf.abs(delta_degree(goal_heading_degree - (current_heading_degree + angular_turn_)))* \
                           tf.case({ tf.equal(cmd_id,3):costn1, tf.equal(cmd_id,2):costn1, tf.equal(cmd_id,1):costn1}, default=costn0)
            heading_loss_weighted = heading_loss * tf.where(
                heading_loss > threshold_degree - 90,
                tf.ones((amount, )) * 0.3,
                tf.ones((amount, )) *
                1000.0)  # + 0.3*angular_turn_relative # + 0.1*angular_turn_abs
            return_heading = tf.reshape(heading_loss_weighted,
                                        (original_batch, amount))

            total_return = return_ + return_heading  # /90.0*12*4

        if PLANNING:
            ##################    #3. define reward for planning
            rewards = forward_speed - 300.0 * tf.where(
                collided > 0.3, collided,
                tf.ones_like(collided) * 0.0
            ) - 20.0 * intersection_offroad - 10.0 * intersection_otherlane
            return_ = discounted_return.discounted_return(
                rewards, length, discount)[:, 0]  # shape: (1000,)
            return_ = tf.reshape(return_,
                                 (original_batch, amount))  # shape: (1, 1000)

            # threshold_degree = tf.where(dist_to_intersection<10, 9.0*(10 - dist_to_intersection), 0)
            threshold_degree = tf.where(dist_to_intersection < 9,
                                        9 * (9 - dist_to_intersection), 0)
            angular_turn_ = discounted_return.discounted_return(
                angular_speed, length, 1.0)[:, 0]  # shape: (1000,)
            # angular_turn_abs = discounted_return.discounted_return(-tf.abs(angular_speed), length, 1.0)[:, 0]
            # angular_turn_relative = tf.reduce_sum(-tf.abs(angular_speed[...,1:]-angular_speed[...,:-1]),axis=-1)
            heading_loss = - tf.abs(delta_degree(goal_heading_degree - (current_heading_degree + angular_turn_)))* \
                           tf.case({ tf.equal(cmd_id,3):costn1, tf.equal(cmd_id,2):costn1, tf.equal(cmd_id,1):costn1}, default=costn0)
            heading_loss_weighted = heading_loss * tf.where(
                heading_loss > threshold_degree - 90,
                tf.ones((amount, )) * 0.3,
                tf.ones((amount, )) *
                1000.0)  # + 0.3*angular_turn_relative # + 0.1*angular_turn_abs
            return_heading = tf.reshape(heading_loss_weighted,
                                        (original_batch, amount))

            total_return = return_ + return_heading  # /90.0*12*4

        # Re-fit belief to the best ones.
        _, indices = tf.nn.top_k(total_return, topk, sorted=False)
        indices += tf.range(original_batch)[:, None] * amount
        best_actions = tf.gather(action, indices)
        mean, variance = tf.nn.moments(best_actions, 1)
        stddev = tf.sqrt(variance + 1e-6)
        return mean, stddev

コード例 #4

0

ファイルを表示

ファイル: planning.py プロジェクト: createamind/planet_A

    def iteration(mean_and_stddev, _):
        mean, stddev, command = mean_and_stddev
        # mean 1 12 2
        # stddev 1 12 2
        # Sample action proposals from belief.
        normal = tf.random_normal((original_batch, amount, horizon) +
                                  action_shape)
        action = normal * stddev[:, None] + mean[:, None]
        action = tf.clip_by_value(action, min_action, max_action)
        # Evaluate proposal actions.
        action = tf.reshape(action, (extended_batch, horizon) + action_shape)
        (_, state), _ = tf.nn.dynamic_rnn(cell, (0 * obs, action, use_obs),
                                          initial_state=initial_state)
        # action
        # Tensor(
        #     "graph/collection/should_collect_carla/simulate-1/train-carla-cem-12/scan/while/simulate/scan/while/Reshape:0",
        #     shape=(1000, 12, 2), dtype=float32)
        reward = objective_fn(state)
        bond_turn = tf.reshape(tf.reduce_sum(action[:, :, 1], axis=1),
                               [1, 1000])
        bond_turn = tf.clip_by_value(bond_turn, -10, 10)
        bond_keep = tf.reshape(tf.reduce_sum(action[:, :, 0], axis=1),
                               [1, 1000])
        bond_straight = tf.reshape(tf.reduce_sum(action[:, :, 0], axis=1), [1, 1000]) - \
                        0.2*tf.reshape(tf.reduce_sum(tf.abs(action[:, :, 1]), axis=1), [1, 1000])
        bond_straight = tf.clip_by_value(bond_straight, -8, 8)
        bond_keep = tf.clip_by_value(bond_keep, -8, 8)

        def f1():
            return bond_straight  # go straight bond

        def f2():
            return bond_turn + 0.2 * bond_keep  # right turn bond

        def f3():
            return -bond_turn + 0.2 * bond_keep  # left turn bond

        def f4():
            return bond_keep  # lane keep bond

        bond = tf.case(
            {
                tf.reduce_all(tf.equal(command, 2)): f2,
                tf.reduce_all(tf.equal(command, 3)): f3,
                tf.reduce_all(tf.equal(command, 4)): f4
            },
            default=f1,
            exclusive=True)

        return_ = discounted_return.discounted_return(reward, length,
                                                      discount)[:, 0]
        return_ = tf.reshape(return_, (original_batch, amount))
        if PLAN_BOND:
            return_ += bond * 0.2
        # Re-fit belief to the best ones.
        _, indices = tf.nn.top_k(return_, topk, sorted=False)
        indices += tf.range(original_batch)[:, None] * amount
        best_actions = tf.gather(action, indices)
        mean, variance = tf.nn.moments(best_actions, 1)
        stddev = tf.sqrt(variance + 1e-6)
        return mean, stddev, command