def reset_sim(self, init_state=[ -0.8, -3.5, 7.8, 0, 0, 1.27, 0, 0.0, 0.0, 0.0, 0.0, 0.0 ]): self.state = init_state #initialize flag variables self.step = 0 self.last_rew = None self.last_feat = None self.failed_steps = 0 #set initial state msg = self.state2msg(self.state) self.reset_pub.publish(msg) #publish reset_flag flag = Int8() flag.data = 1 self.pause_pub.publish(flag) ret_state = np.array(self.get_retstate(self.state)) img = copy.deepcopy(self.IG.cv_image) u = self.state[6] _, feat = get_reward(img, u) return ret_state, self.IG.cv_image, feat
def reset_sim(self, init_state=[ -1.12, -3.6, 7.4, 0, 0, 1.27, 0, 0.0, 0.0, 0.0, 0.0, 0.0 ]): self.state = init_state #self.state[5] = -np.arctan(1.8/0.56) self.state[5] = -0.3 - np.pi / 2 rand_delta_ang = (random.random() * 2 - 1) * np.pi / 6 self.state[5] += rand_delta_ang print("start at direction angle %f deg" % (self.state[5] / np.pi * 180)) #self.state[:2] = random.choice(self.init_pos) #initialize flag variables self.step = 0 self.last_rew = None self.last_feat = None self.failed_steps = 0 #set initial state msg = self.state2msg(self.state) self.reset_pub.publish(msg) #publish reset_flag flag = Int8() flag.data = 1 self.pause_pub.publish(flag) ret_state = np.array(self.get_retstate(self.state)) img = copy.deepcopy(self.IG.cv_image) u = self.state[6] _, feat = get_reward(img, u) return ret_state, self.IG.cv_image, feat
def step(self, action_idx): action = self.actions[action_idx] is_done = False row = self.df.iloc[self.idx] env_flag = self.env_judge(row) reward = rw.get_reward(env_flag, action, row) # 该动作获得的奖励 self.env_react(action, row) if row['ReqNo'] == self.REQ_NUM - 1: is_done = True state = self.observation(row['ReqNo']) return state, float(reward), is_done while True: self.idx += 1 print(self.idx) if self.df.iloc[self.idx]['status'] == 'leave': row = self.df.iloc[self.idx] self.env_rd_change(row) else: self.state_add_req(self.df.iloc[self.idx]) break row = self.df.iloc[self.idx] state = self.observation(row['ReqNo']) return state, float(reward), is_done
def frame_step(self, action): #publish action a_msg = self.action2msg(action) self.Thruster_pub.publish(a_msg) #run to next frame rospy.sleep(0.1) #subscribe new state self.state = np.append(self.State_p.p, self.State_v.v) # get reward img = copy.deepcopy(self.IG.cv_image) u = self.state[6] rew, feat = get_reward(img, u) done = False if rew is None or feat is None: rew = 0 done = True # process None feat #if feat is None: # if self.last_feat is not None: # feat = self.last_feat #else: # self.last_feat = feat # process the None reward #if rew is None: # rew = 0 # self.failed_steps += 1 #else: # self.failed_steps = 0 # judge end condition: fail_step > 5 #done = False #if self.failed_steps > 5: # if rew cannot be continuously detected by 5 times, end the episode. # done = True ret_state = self.get_retstate(self.state) self.step += 1 return ret_state, img, rew, done, feat
def __init__(self, launch_file, config, port_ros='11311', port_gazebo='11345', reward_str='HitReward,CoinReward', logger=None, randomized_target=True, action_space=((.3, .0), (.05, .3), (.05, -.3))): print('port gazeebo: {}, port ros: {}'.format(port_gazebo, port_ros)) gazebo_env.GazeboEnv.__init__(self, launch_file, port=port_ros, port_gazebo=port_gazebo) print('ACTION SPACE', action_space) self.vel_pub = rospy.Publisher('/mobile_base/commands/velocity', Twist, queue_size=5) self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) self.action_space = spaces.Discrete(len(action_space)) self.action_tuple = action_space self.reward_range = (-np.inf, np.inf) self.model_coordinates = rospy.ServiceProxy('/gazebo/get_model_state', GetModelState) self.model = GetModelStateRequest() self.model.model_name = 'mobile_base' self.path_planner = path_planners.PathPlanner(config, randomized_target) self.rewards = get_reward(reward_str, config, path_planner=self.path_planner) self.min_range = 0.2 self.transform_observation = state_transforms.ObservationTransform( config, self.path_planner) self.logger = logger self._seed()
from ultrasonic import get_distance from rc_car import move_left, move_right, move_forward from camera import capture from reward import get_reward from server import init_params, get_action import time r_init = init_params([30 * 30], 3) for i in range(0): st = time.time() distance = 401 while distance >= 400 and distance <= 3000: distance = get_distance() reward = get_reward(distance, 60) print(distance, reward) terminate = False if distance < 60: terminate = True state = capture() print(state.shape) action = get_action(state.tolist(), reward, terminate) print(action) duration = 0.5 if distance > 60: if action == 0: move_left(duration)
import RNN as rnn import agent import reward as r import utils LEARNING_RATE = 0.000025 GAMMA = 0.99 num_iterations = 500 char_to_ix = rnn.char_to_ind() # TODO: Task should be passed from here # # initialize rnn # parameters = rnn.init_parameters() # initialize lstm lstm = agent.lstm() for j in range(num_iterations): code_string, grads = lstm.sample(j) reward = r.get_reward(code_string) gradient_ascent = utils.calc_gradient_ascent(grads, reward.episode_rewards, GAMMA, LEARNING_RATE) lstm.update_params(gradient_ascent) if j % 100 == 0: print('Iteration: %d' % (j) + '\nCorrect Output: ' + str(reward.correct_output) + " Code Output: " + str(reward.code_output) + " Reason: " + str(reward.reason)) print(lstm.sample(0)) print('\n')