def __init__(self, stateDim, inputDim, dt): self.namespace = "" # 0 1 2 3 4 5 6 # x y yaw roll vx vy yawr self.dt = dt self.img_name = '' self.img = None self.length = 15 self.width = 8 self.x0 = 0 #250 self.y0 = 0 #150 self.yaw0 = 0 self.roll0 = 0 self.vx0 = 1 self.vy0 = 0 self.yawd0 = 0 self.states_init = [ self.x0, self.y0, self.yaw0, self.roll0, self.vx0, self.vy0, self.yawd0 ] self.stateDim = stateDim self.inputDim = inputDim self.dynamicsDim = 4 self.states = np.zeros(self.stateDim) self.states_der = np.zeros(self.stateDim) self.state_hist = np.zeros([1, (self.stateDim)]) self.state_der_hist = np.zeros([1, (self.stateDim)]) self.data = np.zeros([1, 2 * self.dynamicsDim + self.inputDim]) self.inputs = np.zeros(self.inputDim) self.inputs_der = np.zeros(self.inputDim) self.dynamics = dynamics.Dynamics(stateDim, inputDim, dt) self.toggle = False self.pose_pub = rospy.Publisher('simulation/pose', PoseStamped, queue_size=1) self.bodyOdom_pub = rospy.Publisher('simulation/bodyOdom', Odometry, queue_size=1) self.poseOdom_pub = rospy.Publisher('pose_estimate', Odometry, queue_size=1) self.input_pub = rospy.Publisher('simulation/inputs', Joy, queue_size=1) self.accel_pub = rospy.Publisher('acceleration/all', Accel, queue_size=1) self.br = tf2_ros.TransformBroadcaster() # pygame.init() # self.screen = pygame.display.set_mode((480, 320), DOUBLEBUF) self.RED = (255, 0, 0) self.BLACK = (255, 255, 255) self.WHITE = (0, 0, 0)
def read_world(self, file_world): maxx = 0 maxy = 0 if file_world != '': f = open(file_world, "r") f1 = f.readlines() for i in range(0, len(f1)): if i == 0: self.xlim = np.abs(int(int(f1[i]))) maxx = np.abs(int(int(f1[i]))) elif i == 1: self.ylim = np.abs(int(int(f1[i]))) maxy = np.abs(int(int(f1[i]))) else: for j in range(0, len(f1[i])): if f1[i][j] == '@': self.curr_x = self.wpt_x = j self.curr_y = self.wpt_y = (maxy - i + 1) if f1[i][j] == '#': self.static_obs.append((j, (maxy - i + 1))) self.boat_dynamics = dynamics.Dynamics(cw4.cw4, self.curr_x, self.curr_y)
def __init__(self): mass = rospy.get_param("mass", 1.0) damping = rospy.get_param("damping", 0.0) self.lock = threading.Lock() self.enabled = False self.dynamics = dynamics.Dynamics() self.dynamics.mass = mass self.dynamics.damping = damping self.dynamics.reset() # Setup services self.node_enable_srv = rospy.Service( 'dynamics_enable', NodeEnable, self.handle_node_enable ) self.set_dynam_params_srv = rospy.Service( 'set_dynam_params', SetDynamParams, self.handle_set_dynam_params ) self.get_dynam_params_srv = rospy.Service( 'get_dynam_params', GetDynamParams, self.handle_get_dynam_params ) # Setup subscribers self.force_sub = rospy.Subscriber('force', ForceMsg, self.force_callback) # Setup setpt topic #self.setptMsg = None self.setptMsg = SetptMsg() self.setptPub = rospy.Publisher('setpt_rel', SetptMsg)
def __init__(self, start_x=0.0, start_y=0.0, start_heading=0.0, start_speed=0.0, nobs=4, xlim=1000.0, ylim=1000.0, plot_bool=False, file_world='', goal_location='', environment=None, geotiff=None, boat_model=cw4.cw4, dynamic_obs=''): self.debug = False self.throttle = 0.0 self.rudder = 0.0 # Create an instance of the asv_sim model # Set parameters for the model. self.environment = environment self.boat_model = boat_model self.boat_dynamics = dynamics.Dynamics(self.boat_model, start_x, start_y, start_heading, start_speed, self.environment) # Set the starting state self.start_lat = 43.0 self.start_long = -70.1 self.start_heading = start_heading self.start_speed = start_speed # Set target waypoint self.desired_lat = 42.997797 # Does it need to be updated from somewhere else? self.desired_long = -70.102602 # Does it need to be updated from somewhere else? self.wpt_x = start_x # 0.0 self.wpt_y = start_y # 0.0 # deg to rad self.desired_lat = np.radians(self.desired_lat) self.desired_long = np.radians(self.desired_long) # set target speed self.desired_speed = self.start_speed self.desired_heading = self.start_heading # current state: self.curr_lat = self.start_lat self.curr_long = self.start_long self.curr_x = start_x self.curr_y = start_y self.curr_heading = self.start_heading # Loop at some rate self.period = 0.05 self.decreaserate = 0.0 self.last_update_time = None self.xx = [] self.yy = [] # Define the socket for input data... self.soc = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM) # Define the socket for output data - ASV current state self.soc_asv_out = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM) # Define the socket for output data - Obstacles current state self.soc_obs_out = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM) # Limits of the area self.xlim = np.abs(xlim) self.ylim = np.abs(ylim) # read the grid and update the boat model, limit, curr_x, curr_y, wpt_x, wpt_y self.static_obs = None self.static_draw = [] self.goal_location = [] self.factor = 1 self.read_world(file_world, geotiff, goal_location) if dynamic_obs: self.read_dynamic(dynamic_obs) else: # Initializing obstacles: self.nobs = nobs # number of obstacles self.xobs = np.random.uniform(-self.xlim, self.xlim, self.nobs) self.yobs = np.random.uniform(-self.ylim, self.ylim, self.nobs) # speed of obstacles [m/sec] self.vobs = np.random.uniform(2.57, 7.72, self.nobs) # heading of obstacles [rad] self.hobs = np.random.uniform(0, 2 * np.pi, self.nobs) self.xx_obs = [] self.yy_obs = [] self.obs_id = np.arange(self.nobs) #self.cmap = plt.cm.summer #rcParams['axes.prop_cycle'] = cycler(color=self.cmap(np.linspace(0, 1, self.nobs))) # self.obstacles = np.zeros(self.nobs, dtype=[('position',float,2), # ('id', float, 1), # ('color',float, 4)]) #futrue path self.future_heading = [] self.future_x = [] self.future_y = [] self.estimateStart = (0, 0, 0) # Plot self.plot_boolean = plot_bool # Verifying self.plot_boolean is of type Boolean and not str: if self.plot_boolean == 'True': self.plot_boolean = True if self.plot_boolean == 'False': self.plot_boolean = False if self.debug: print("Initial plot_boolean = ", self.plot_boolean) # plt.ion() # # Set up the figure # fig = plt.figure() # plt.xlabel("x [m]") # plt.ylabel("y [m]") self.plotaxes = None self.plot_draw = test.PLOT(self.static_obs, self.static_draw, self.xlim, self.ylim, self.factor) self.plotaxes_obs = list(range(self.nobs))
def read_world(self, file_world, geotiff, goal_location): maxx = 0 maxy = 0 if geotiff: geotiff = depth_map.BathyGrid(geotiff) x1, x2, y1, y2 = geotiff.getBound() maxx = np.abs(x2 - x1) maxy = np.abs(y2 - y1) geodata = geotiff.getGrid() by, bx = np.shape(geodata) previous = None self.xlim = max(bx, by) self.ylim = self.xlim self.factor = maxx / bx self.static_obs = [bitarray(by) for i in xrange(bx)] for j in range(by): for i in range(bx): if geodata[j, i] < 2: self.static_obs[i][by - 1 - j] = True if previous == None: previous = ((i, (by - 1 - j))) else: self.static_obs[i][by - 1 - j] = False if previous != None: x, y = previous self.static_draw.append((previous, (i, y + 1))) previous = None if previous != None: x, y = previous self.static_draw.append((previous, (bx, y + 1))) previous = None self.boat_dynamics = dynamics.Dynamics(self.boat_model, self.curr_x, self.curr_y, self.curr_heading, self.start_speed, self.environment) elif file_world != '': f = open(file_world, "r") f1 = f.readlines() previous = None for i in range(0, len(f1)): if i == 1: self.xlim = np.abs(int(int(f1[i]))) maxx = np.abs(int(int(f1[i]))) elif i == 2: self.ylim = np.abs(int(int(f1[i]))) maxy = np.abs(int(int(f1[i]))) self.static_obs = [bitarray(maxy) for i in xrange(maxx)] print maxx, maxy elif i == 0: self.factor = np.abs(int(int(f1[i]))) else: string = f1[i].strip() for j in range(0, len(string)): if f1[i][j] == '#': self.static_obs[j][maxy - i + 2] = True if previous == None: previous = ((j, maxy - i + 2)) else: self.static_obs[j][maxy - i + 2] = False if previous != None: x, y = previous self.static_draw.append((previous, (j, y + 1))) previous = None if previous != None: x, y = previous self.static_draw.append((previous, (maxx, y + 1))) previous = None self.boat_dynamics = dynamics.Dynamics(self.boat_model, self.curr_x, self.curr_y, self.curr_heading, self.start_speed, self.environment) if goal_location != '': f = open(goal_location, "r") f1 = f.readlines() numOfGoal = int(f1[0]) for i in range(1, len(f1)): s = f1[i].split(' ') self.goal_location.append((float(s[0]), float(s[1])))
def run(self): xlist = np.array( [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) ylist = np.array([5.0, 6.0, 7.0, 8.0, 9.0,\ 10.0, 11.0, 12.0, 13.0, 14.0,\ 15.0, 16.0, 17.0, 18.0, 19.0, \ 20.0, 21.0, 22.0, 23.0, 24.0, \ 25.0, 26.0, 27.0, 28.0, 29.0, \ 30.0, 31.0, 32.0, 33.0, 34.0, \ 35.0, 36.0, 37.0, 38.0, 39.0, \ 40.0, 41.0, 42.0, 43.0, 44.0, \ 45.0, 46.0, 47.0, 48.0, 49.0, 50.0]) vallist = np.zeros((45, 10)) for a_iteration in range(10): a_param = 0.05 + a_iteration * 0.1 for b_iteration in range(45): b_param = 5.5 + b_iteration * 1.0 max_reward = -10000.0 # (a,b)'s score rewards = 0 theta = np.pi omega = 0.0 state = np.array([[theta, omega]]) for test_step in range(1000): current_obs = torch.Tensor([[ np.sin(state[0, 0]), np.cos(state[0, 0]), state[0, 1] ]]) #state: numpy(1,2) -> torch.Tensor(1,3) action = self.agent.get_action(current_obs, None) #Not input ounoise action = action.detach().numpy()[ 0] #action: torch.Tensor(1,1) -> numpy(scaler) next_state, reward, done = dynamics.Dynamics( state, action, a_param, b_param) #next_state: numpy(1,2) rewards += reward state = next_state print("(a,b)=(" + str(a_param) + "," + str(b_param) + ") : reward " + str(rewards)) vallist[b_iteration, a_iteration] = rewards plt.rcParams['font.family'] = 'Times New Roman' plt.rcParams["mathtext.fontset"] = 'cm' plt.rcParams['mathtext.default'] = 'it' fig, ax = plt.subplots() cs = ax.pcolormesh(xlist, ylist, vallist, cmap="jet", vmin=-4000.0, vmax=-50.0) #seismic,hot fig.colorbar(cs) ax.set_xlim(0.0, 1.0) ax.set_ylim(5.0, 50.0) ax.set_xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0]) ax.set_yticks( [5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0]) ax.set_xlabel(r'$\xi_{1}$', fontsize=18) ax.set_ylabel(r'$\xi_{2}$', fontsize=18) fig.savefig('Score_of_mu_1.eps', pad_inches=0.05) fig.savefig('Score_of_mu_1.png', pad_inches=0.05)
def __init__(self, stateDim, inputDim, dt): self.namespace = "" ''' states : [x, y, yaw, vx] ''' ''' Initial conditions ''' self.x0 = 0. self.y0 = 0. self.yaw0 = 0. self.vx0 = 0. ''' Parameters (RC platform) ''' self.dt = dt self.max_steer_anlge = 17.75 * np.pi / 180 # rad self.max_vx = 30.0 / 3.6 # m/s self.length = 0.257 ''' Variable initialization ''' self.stateDim = stateDim self.inputDim = inputDim self.dynamicsDim = 4 self.states_init = [self.x0, self.y0, self.yaw0, self.vx0] self.states = self.states_init self.states_dot = np.zeros(self.stateDim) self.state_hist = np.zeros([1, (self.stateDim)]) self.state_dot_hist = np.zeros([1, (self.stateDim)]) self.data = np.zeros([1, 2 * self.dynamicsDim + self.inputDim]) self.inputs = np.zeros(self.inputDim) self.toggle_switch = False ''' Vehicle dynamics object ''' self.dynamics = dynamics.Dynamics(stateDim, inputDim, dt) # self.dynamics.modelType = 1 self.dynamics.max_steer = self.max_steer_anlge ''' ROS publishers ''' self.pose_pub = rospy.Publisher('simulation/pose', PoseStamped, queue_size=1) self.bodyOdom_pub = rospy.Publisher( 'simulation/bodyOdom', Odometry, queue_size=1) # velocities in the body frame. self.poseOdom_pub = rospy.Publisher( 'simulation/poseOdom', Odometry, queue_size=1) # velocities in the map frame. self.input_pub = rospy.Publisher('simulation/inputs', Joy, queue_size=1) self.accel_pub = rospy.Publisher('simulation/acceleration', Accel, queue_size=1) self.br = tf2_ros.TransformBroadcaster() self.ego_model_marker_pub = rospy.Publisher('simulation/car_marker', Marker, queue_size=1) ''' Variable definitions for autonomous driving ''' self.auto_mode = True self.control_sub = rospy.Subscriber('control', AckermannDriveStamped, self.controlSubCallback) self.steering = 0 self.throttle = 0 self.steer_angle_to_norm = 1 / self.max_steer_anlge #1/0.25 #30/180*np.pi self.throttle_to_norm = 1
def run(self): #Initialize Replay Memory (class) memory = MEMORY(self.args.replay_buffer_size) plt.rcParams['font.family'] = 'Times New Roman' plt.rcParams["mathtext.fontset"] = 'cm' plt.rcParams['mathtext.default'] = 'it' params = {'legend.fontsize': 12, 'legend.handlelength': 3} plt.rcParams.update(params) fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(9, 6)) plt.subplots_adjust(hspace=0.5) sum_of_rewards = 0 a_param = self.args.a_param b_param = self.args.b_param #Learning Phase state = dynamics.Initialize() # Get the initial state s_0 (numpy(1,2)) print("Initial State is " + str(state)) print('This episode mass:' + str(a_param)) print('This episode length:' + str(b_param)) MAX_STEP = 1000 time_list = [] a_list = [] x_1_list = [] x_2_list = [] for learning_step in range(MAX_STEP): #gradually change if learning_step < 200: b_param += 45.0 / 200 x_1_list.append(state[0, 0]) x_2_list.append(state[0, 1]) time_list.append(learning_step) current_obs = torch.Tensor( [[np.sin(state[0, 0]), np.cos(state[0, 0]), state[0, 1]]]) #state: numpy(1,2) -> torch.Tensor(1,3) action = self.agent.get_action( current_obs, None) #exploration action by agent (torch.Tensor(1,1)) action = action.detach().numpy()[ 0] #action: torch.Tensor(1,1) -> numpy(1,) #exploration noise############################################### if np.sqrt(state[0, 0]**(2) + state[0, 1]**(2)) >= 0.05: noise = 0.1 * np.random.normal() action = action + noise a_list.append(action) next_state, reward, done = dynamics.Dynamics( state, action, a_param, b_param) #next_state: numpy(1,2) sum_of_rewards += reward #Make Exploration action = torch.Tensor([action ]) #action: numpy(1,) -> torch.Tensor(1,1) mask = torch.Tensor( [not done]) #mask: bool(False) -> torch.Tensor(1)(True) next_obs = torch.Tensor([[ np.sin(next_state[0, 0]), np.cos(next_state[0, 0]), next_state[0, 1] ]]) #next_state: numpy(1,2) -> torch.Tensor(1,3) reward = torch.Tensor( [reward]) #reward: numpy(scalar) -> torch.Tensor(1) if abs( action[0] ) <= 1.0: #If we do not want to store the experience that has the big scale action. memory.push(current_obs, action, mask, next_obs, reward) # all torch.Tensor state = next_state #Update main DNN and target DNN if len(memory) > self.args.batch_size: transitions = memory.sample( self.args.batch_size) #Make exploration_batch batch = Transition(*zip(*transitions)) self.agent.update_DNNs(batch) #Update DNN self.agent.update_target_DNNs() #Update Target DNN #if done: #break print("Sum of rewards is " + str(sum_of_rewards)) axes[0].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed') axes[0].plot(time_list, a_list, linewidth=2) axes[0].set_xlim(0.0, MAX_STEP) axes[0].set_ylim(-1, 1) axes[0].set_xlabel('$k$', fontsize=16) axes[0].set_ylabel('$a[k]$', fontsize=16) axes[0].grid(True) axes[1].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed') axes[1].plot(time_list, x_1_list, linewidth=2) axes[1].set_xlim(0.0, MAX_STEP) axes[1].set_ylim(-np.pi, np.pi) axes[1].set_xlabel('$k$', fontsize=16) axes[1].set_ylabel('$x_1[k]$', fontsize=16) axes[1].grid(True) axes[2].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed') axes[2].plot(time_list, x_2_list, linewidth=2) axes[2].set_xlim(0.0, MAX_STEP) axes[2].set_ylim(-7, 7) axes[2].set_xlabel('$k$', fontsize=16) axes[2].set_ylabel('$x_2[k]$', fontsize=16) axes[2].grid(True) fig.savefig('standard_from5_to_50.eps', bbox_inches="tight", pad_inches=0.05) fig.savefig('standard_from5_to_50.png', bbox_inches="tight", pad_inches=0.05)
def run(self): #episode_final = False plt.rcParams['font.family'] = 'Times New Roman' plt.rcParams["mathtext.fontset"] = 'cm' plt.rcParams['mathtext.default'] = 'it' params = {'legend.fontsize': 12, 'legend.handlelength': 3} plt.rcParams.update(params) fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(9, 10)) plt.subplots_adjust(hspace=0.8) #reward list sum_reward_list = [] #======================Hyper Parameter===================== weight = np.array([[1 / 4, 1 / 4, 1 / 4, 1 / 4]]) learn_alpha = 5e-5 gamma = 0.99 MAX_STEP = 1000 #========================================================== max_reward = -10000.0 a_param = self.args.a_param b_param = self.args.b_param weight_1_list = [] weight_2_list = [] weight_3_list = [] weight_4_list = [] time_list = [] a_list = [] x_1_list = [] x_2_list = [] td_error_list = [] Discrete_time = 0 state = np.array([[np.pi, 0.0]]) for test_step in range(MAX_STEP): weight_1_list.append(weight[0, 0]) #store the initial parameter weight_2_list.append(weight[0, 1]) weight_3_list.append(weight[0, 2]) weight_4_list.append(weight[0, 3]) time_list.append(test_step) x_1_list.append(state[0, 0]) x_2_list.append(state[0, 1]) current_obs = torch.Tensor([[state[0, 0], state[0, 1]]]) action = self.agent.get_action(current_obs, weight) #Not input ounoise action = action.detach().numpy()[ 0] #action: torch.Tensor(1,1) -> numpy(scaler) #exploration noise############################################### noise = max( (400 - Discrete_time), 0.0) / 400 * 0.1 * np.random.normal() action = action + noise a_list.append(action) action = torch.Tensor([action]) Q_vec = self.agent.get_Q_value( current_obs, action) # Q(x[k],a[k]) as characteristic functions action = action.detach().numpy()[ 0] #action: torch.Tensor(1,1) -> numpy(1,) next_state, reward, done = dynamics.Dynamics( state, action, a_param, b_param) #next_state: numpy(1,2) next_obs = torch.Tensor([[next_state[0, 0], next_state[0, 1]]]) #update of the parameters max_Q_next_vec = self.agent.get_next_value(next_obs, weight) param = np.array( [[weight[0, 0], weight[0, 1], weight[0, 2], weight[0, 3]]]) #w=[w_{1},...,w_{N}] td_error = param @ Q_vec.T - (reward + gamma * (param @ max_Q_next_vec.T)) td_error_list.append(abs(td_error[0, 0])) chara_vec = np.array( [[Q_vec[0, 0], Q_vec[0, 1], Q_vec[0, 2], Q_vec[0, 3]]]) update_vec = td_error * chara_vec #Barrier eta = 1e-7 epsilon_w = 1e-9 barrier_vec = eta*np.array([[-1/(weight[0,0]+epsilon_w),\ -1/(weight[0,1]+epsilon_w),\ -1/(weight[0,2]+epsilon_w),\ -1/(weight[0,3]+epsilon_w)]]) update_vec = update_vec + barrier_vec pre_weight = weight #memorize pre_weight weight = weight - learn_alpha * (update_vec ) #weight is next weight if (weight[0, 0] < 0.0) or (weight[0, 1] < 0.0) or (weight[0, 2] < 0.0) or ( weight[0, 3] < 0.0): #If some weights are negative update_error_count = 1 while (True): weight = pre_weight weight = weight - (2**( -update_error_count)) * learn_alpha * (update_vec) update_error_count += 1 if (weight[0, 0] >= 0.0) and (weight[0, 1] >= 0.0) and ( weight[0, 2] >= 0.0) and (weight[0, 3] >= 0.0): break weight_sum = weight[0, 0] + weight[0, 1] + weight[0, 2] + weight[0, 3] weight = weight / weight_sum state = next_state Discrete_time += 1 axes[0].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed') axes[0].plot(time_list, a_list, linewidth=2) axes[0].set_xlim(0.0, MAX_STEP) axes[0].set_ylim(-1, 1) axes[0].set_xlabel('$k$', fontsize=16) axes[0].set_ylabel('$a[k]$', fontsize=16) axes[0].grid(True) axes[1].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed') axes[1].plot(time_list, x_1_list, linewidth=2) axes[1].set_xlim(0.0, MAX_STEP) axes[1].set_ylim(-np.pi, np.pi) axes[1].set_xlabel('$k$', fontsize=16) axes[1].set_ylabel('$x_1[k]$', fontsize=16) axes[1].grid(True) axes[2].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed') axes[2].plot(time_list, x_2_list, linewidth=2) axes[2].set_xlim(0.0, MAX_STEP) axes[2].set_ylim(-7, 7) axes[2].set_xlabel('$k$', fontsize=16) axes[2].set_ylabel('$x_2[k]$', fontsize=16) axes[2].grid(True) axes[3].plot(time_list, weight_1_list, linewidth=2, label="$w_1$") axes[3].plot(time_list, weight_2_list, linewidth=2, label="$w_2$") axes[3].plot(time_list, weight_3_list, linewidth=2, label="$w_3$") axes[3].plot(time_list, weight_4_list, linewidth=2, label="$w_4$") axes[3].set_xlim(0.0, MAX_STEP) axes[3].set_ylim(0, 1) axes[3].set_xlabel('$k$', fontsize=16) axes[3].set_ylabel('$w[k]$', fontsize=16) axes[3].grid(True) axes[3].legend(loc='upper left', ncol=4) axes[4].plot(time_list, td_error_list, linewidth=2) axes[4].set_xlim(0.0, MAX_STEP) # axes[4].set_ylim(0,0.5) axes[4].set_xlabel('$k$', fontsize=16) axes[4].set_ylabel(r'$|\delta[k]|$', fontsize=16) axes[4].grid(True) fig.savefig('TR_N4_case1.eps', bbox_inches="tight", pad_inches=0.05) fig.savefig('TR_N4_case1.png', bbox_inches="tight", pad_inches=0.05)
def run(self): xlist = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) ylist = np.array([5.0, 6.0, 7.0, 8.0, 9.0,\ 10.0, 11.0, 12.0, 13.0, 14.0,\ 15.0, 16.0, 17.0, 18.0, 19.0, \ 20.0, 21.0, 22.0, 23.0, 24.0, \ 25.0, 26.0, 27.0, 28.0, 29.0, \ 30.0, 31.0, 32.0, 33.0, 34.0, \ 35.0, 36.0, 37.0, 38.0, 39.0, \ 40.0, 41.0, 42.0, 43.0, 44.0, \ 45.0, 46.0, 47.0, 48.0, 49.0, 50.0]) vallist = np.zeros((45,10)) for a_iteration in range(10): a_param = 0.05 + a_iteration*0.1 for b_iteration in range(45): b_param = 5.5 + b_iteration*1.0 #======================Hyper Parameter===================== weight = np.array([[1/8,1/8,1/8,1/8,1/8,1/8,1/8,1/8]]) learn_alpha = 5e-5 gamma = 0.99 MAX_STEP = 1000 #========================================================== state = np.array([[np.pi, 0.0]]) rewards = 0 #sum of rewards for each system (a,b) for test_step in range(MAX_STEP): current_obs = torch.Tensor([[state[0,0],state[0,1]]]) action = self.agent.get_action(current_obs, weight) action = action.detach().numpy()[0] #action: torch.Tensor(1,1) -> numpy(1,) #exploration noise========================= noise = max((400-test_step),0.0)/400*0.1*np.random.normal() action = action + noise #numpy(1,) #========================================== action = torch.Tensor([action]) Q_vec = self.agent.get_Q_value(current_obs, action) # Q(x[k],a[k]) as characteristic functions action = action.detach().numpy()[0] #action: torch.Tensor(1,1) -> numpy(1,) next_state, reward, done = dynamics.Dynamics(state, action, a_param, b_param) #next_state: numpy(1,2) next_obs = torch.Tensor([[next_state[0,0], next_state[0,1]]]) #update of the parameters max_Q_next_vec = self.agent.get_next_value(next_obs, weight) param = np.array([[weight[0,0], weight[0,1], weight[0,2], weight[0,3], weight[0,4], weight[0,5], weight[0,6], weight[0,7]]]) #w=[w_{1},...,w_{N}] td_error = param @ Q_vec.T - (reward + gamma * (param @ max_Q_next_vec.T)) chara_vec = np.array([[Q_vec[0,0], Q_vec[0,1], Q_vec[0,2], Q_vec[0,3], Q_vec[0,4], Q_vec[0,5], Q_vec[0,6], Q_vec[0,7]]]) update_vec = td_error * chara_vec #Barrier eta = 1e-7 epsilon_w = 1e-9 barrier_vec = eta*np.array([[-1/(weight[0,0]+epsilon_w),\ -1/(weight[0,1]+epsilon_w),\ -1/(weight[0,2]+epsilon_w),\ -1/(weight[0,3]+epsilon_w),\ -1/(weight[0,4]+epsilon_w),\ -1/(weight[0,5]+epsilon_w),\ -1/(weight[0,6]+epsilon_w),\ -1/(weight[0,7]+epsilon_w)]]) update_vec = update_vec + barrier_vec pre_weight = weight #memorize pre_weight weight = weight - learn_alpha * (update_vec) if (weight[0,0]<0.0)or(weight[0,1]<0.0)or(weight[0,2]<0.0)or(weight[0,3]<0.0)or(weight[0,4]<0.0)or(weight[0,5]<0.0)or(weight[0,6]<0.0)or(weight[0,7]<0.0): #If some weights are negative update_error_count = 1 while(True): weight = pre_weight weight = weight - (2**(-update_error_count))*learn_alpha * (update_vec) update_error_count += 1 if (weight[0,0]>=0.0)and(weight[0,1]>=0.0)and(weight[0,2]>=0.0)and(weight[0,3]>=0.0)and(weight[0,4]>=0.0)and(weight[0,5]>=0.0)and(weight[0,6]>=0.0)and(weight[0,7]>=0.0): break #Normalize weight weight_sum = weight[0,0] + weight[0,1] + weight[0,2] + weight[0,3] + weight[0,4] + weight[0,5] + weight[0,6] + weight[0,7] weight = weight/weight_sum rewards += reward state = next_state print("##########################################################################################") print("(a,b)=("+str(a_param)+","+str(b_param)+") : reward "+str(rewards)+" Weight is "+str(weight)) print("Last State is "+str(state)) print("##########################################################################################") vallist[b_iteration,a_iteration] = rewards plt.rcParams['font.family'] = 'Times New Roman' plt.rcParams["mathtext.fontset"] = 'cm' plt.rcParams['mathtext.default'] = 'it' fig, ax = plt.subplots() cs = ax.pcolormesh(xlist, ylist, vallist, cmap="jet", vmin=-4000.0, vmax=-50.0)#seismic,hot fig.colorbar(cs) ax.set_xlim(0.0, 1.0) ax.set_ylim(5.0, 50.0) ax.set_xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0]) ax.set_yticks([5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0]) ax.set_xlabel(r'$\xi_{1}$',fontsize=16) ax.set_ylabel(r'$\xi_{2}$',fontsize=16) fig.savefig('N8.eps', pad_inches=0.05) fig.savefig('N8.png', pad_inches=0.05)
desired_long = -70.102602 # Does it need to be updated from somewhere else? # deg to rad desired_lat = np.radians(desired_lat) desired_long = np.radians(desired_long) wpt_x = 10 wpt_y = 10 # set target speed desired_speed = 2.5 #get via UDP server # Loop at some rate dt = 0.01 # [sec] t = 0.0 boat_dynamics = dynamics.Dynamics(boat) def heading_to_rudder(heading): #heading_delta = data.heading.heading - current_heading heading_delta = desired_heading - curr_heading if heading_delta > np.pi: heading_delta -= np.pi * 2.0 if heading_delta < -np.pi: heading_delta += np.pi * 2.0 rudder = max(-1.0, min(1.0, heading_delta)) return rudder def speed_to_throttle(speed): #throttle = speed / dynamics.model["max_speed"]