Exemple #1
0
    def reset_sim(self,
                  init_state=[
                      -0.8, -3.5, 7.8, 0, 0, 1.27, 0, 0.0, 0.0, 0.0, 0.0, 0.0
                  ]):
        self.state = init_state

        #initialize flag variables
        self.step = 0
        self.last_rew = None
        self.last_feat = None
        self.failed_steps = 0

        #set initial state
        msg = self.state2msg(self.state)
        self.reset_pub.publish(msg)

        #publish reset_flag
        flag = Int8()
        flag.data = 1
        self.pause_pub.publish(flag)

        ret_state = np.array(self.get_retstate(self.state))

        img = copy.deepcopy(self.IG.cv_image)
        u = self.state[6]
        _, feat = get_reward(img, u)

        return ret_state, self.IG.cv_image, feat
Exemple #2
0
    def reset_sim(self,
                  init_state=[
                      -1.12, -3.6, 7.4, 0, 0, 1.27, 0, 0.0, 0.0, 0.0, 0.0, 0.0
                  ]):
        self.state = init_state
        #self.state[5] = -np.arctan(1.8/0.56)
        self.state[5] = -0.3 - np.pi / 2
        rand_delta_ang = (random.random() * 2 - 1) * np.pi / 6
        self.state[5] += rand_delta_ang
        print("start at direction angle %f deg" %
              (self.state[5] / np.pi * 180))
        #self.state[:2] = random.choice(self.init_pos)

        #initialize flag variables
        self.step = 0
        self.last_rew = None
        self.last_feat = None
        self.failed_steps = 0

        #set initial state
        msg = self.state2msg(self.state)
        self.reset_pub.publish(msg)

        #publish reset_flag
        flag = Int8()
        flag.data = 1
        self.pause_pub.publish(flag)

        ret_state = np.array(self.get_retstate(self.state))

        img = copy.deepcopy(self.IG.cv_image)
        u = self.state[6]
        _, feat = get_reward(img, u)

        return ret_state, self.IG.cv_image, feat
    def step(self, action_idx):
        action = self.actions[action_idx]
        is_done = False
        row = self.df.iloc[self.idx]
        env_flag = self.env_judge(row)
        reward = rw.get_reward(env_flag, action, row)  # 该动作获得的奖励
        self.env_react(action, row)

        if row['ReqNo'] == self.REQ_NUM - 1:
            is_done = True
            state = self.observation(row['ReqNo'])
            return state, float(reward), is_done

        while True:
            self.idx += 1
            print(self.idx)
            if self.df.iloc[self.idx]['status'] == 'leave':
                row = self.df.iloc[self.idx]
                self.env_rd_change(row)
            else:
                self.state_add_req(self.df.iloc[self.idx])
                break

        row = self.df.iloc[self.idx]
        state = self.observation(row['ReqNo'])
        return state, float(reward), is_done
Exemple #4
0
    def frame_step(self, action):
        #publish action
        a_msg = self.action2msg(action)
        self.Thruster_pub.publish(a_msg)

        #run to next frame
        rospy.sleep(0.1)

        #subscribe new state
        self.state = np.append(self.State_p.p, self.State_v.v)

        # get reward
        img = copy.deepcopy(self.IG.cv_image)
        u = self.state[6]
        rew, feat = get_reward(img, u)

        done = False
        if rew is None or feat is None:
            rew = 0
            done = True

        # process None feat
        #if feat is None:
        #    if self.last_feat is not None:
        #        feat = self.last_feat
        #else:
        #    self.last_feat = feat

        # process the None reward
        #if rew is None:
        #    rew = 0
        #    self.failed_steps += 1
        #else:
        #    self.failed_steps = 0

        # judge end condition: fail_step > 5
        #done = False
        #if self.failed_steps > 5:   # if rew cannot be continuously detected by 5 times, end the episode.
        #    done = True

        ret_state = self.get_retstate(self.state)

        self.step += 1
        return ret_state, img, rew, done, feat
 def __init__(self,
              launch_file,
              config,
              port_ros='11311',
              port_gazebo='11345',
              reward_str='HitReward,CoinReward',
              logger=None,
              randomized_target=True,
              action_space=((.3, .0), (.05, .3), (.05, -.3))):
     print('port gazeebo: {}, port ros: {}'.format(port_gazebo, port_ros))
     gazebo_env.GazeboEnv.__init__(self,
                                   launch_file,
                                   port=port_ros,
                                   port_gazebo=port_gazebo)
     print('ACTION SPACE', action_space)
     self.vel_pub = rospy.Publisher('/mobile_base/commands/velocity',
                                    Twist,
                                    queue_size=5)
     self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
     self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
     self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                           Empty)
     self.action_space = spaces.Discrete(len(action_space))
     self.action_tuple = action_space
     self.reward_range = (-np.inf, np.inf)
     self.model_coordinates = rospy.ServiceProxy('/gazebo/get_model_state',
                                                 GetModelState)
     self.model = GetModelStateRequest()
     self.model.model_name = 'mobile_base'
     self.path_planner = path_planners.PathPlanner(config,
                                                   randomized_target)
     self.rewards = get_reward(reward_str,
                               config,
                               path_planner=self.path_planner)
     self.min_range = 0.2
     self.transform_observation = state_transforms.ObservationTransform(
         config, self.path_planner)
     self.logger = logger
     self._seed()
from ultrasonic import get_distance
from rc_car import move_left, move_right, move_forward
from camera import capture
from reward import get_reward
from server import init_params, get_action
import time

r_init = init_params([30 * 30], 3)

for i in range(0):
    st = time.time()
    
    distance = 401
    while distance >= 400 and distance <= 3000:
        distance = get_distance()
    reward = get_reward(distance, 60)
    print(distance, reward)
    terminate = False
    if distance < 60:
        terminate = True

    state = capture()
    print(state.shape)
    action = get_action(state.tolist(), reward, terminate)
    print(action)
    
    duration = 0.5
    
    if distance > 60:
        if action == 0:
            move_left(duration)
import RNN as rnn
import agent
import reward as r
import utils

LEARNING_RATE = 0.000025
GAMMA = 0.99
num_iterations = 500
char_to_ix = rnn.char_to_ind()
# TODO: Task should be passed from here

# # initialize rnn
# parameters = rnn.init_parameters()

# initialize lstm
lstm = agent.lstm()

for j in range(num_iterations):
    code_string, grads = lstm.sample(j)
    reward = r.get_reward(code_string)
    gradient_ascent = utils.calc_gradient_ascent(grads, reward.episode_rewards,
                                                 GAMMA, LEARNING_RATE)
    lstm.update_params(gradient_ascent)

    if j % 100 == 0:
        print('Iteration: %d' % (j) + '\nCorrect Output: ' +
              str(reward.correct_output) + " Code Output: " +
              str(reward.code_output) + " Reason: " + str(reward.reason))
        print(lstm.sample(0))
        print('\n')