Example #1
0
def button_find_solution():
    """
    :description: Starts searching for solution using A star algorithm
        note: all openStates and closed_states lists hold OBJECTS not lists of tiles
    :return: None
    """
    global master_states, openStates, explored_states, isButtonFindSolutionOn, isButtonShowSolutionOn, \
        puzzle_state

    if isButtonFindSolutionOn is True:
        # print goalState
        # create object for initial state and save to master states list
        master_states.append(classes.State(initState, initState, goalState, 0))
        openStates.append(master_states[0])
        closed_states = []

        while openStates:
            x = openStates[0]
            openStates.pop(0)

            # Checks if the goal state is found
            if x.node == goalState:
                for state in closed_states:
                    explored_states.append(state)
                explored_states.append(x)

                # (function call) If the goal is found, trace down the path back to the initial state
                # Neglect any non-essential states
                find_path()

                # (function call) displays the result in the console
                display_solution()

                isButtonFindSolutionOn = False
                isButtonShowSolutionOn = True
                puzzle_state = list(initState)
                return None

            # Otherwise keep looking by expanding more states
            #   and assign the states with heuristic values.
            else:
                x.generate_children(master_states)

                for child in x.children:
                    master_states.append(
                        classes.State(child, initState, goalState, (x.gn + 1)))
                    if not (master_states[-1] in openStates
                            or master_states[-1] in closed_states):
                        openStates.append(master_states[-1])
                closed_states.append(x)

                # (function call) This is where the open variable is sorted according to their f(n) value
                #       from lowest to highest.
                reorder_heuristics()
        else:
            print "Failed to find a solution"
Example #2
0
def cartesian(GM, a, e, incl, longnode, argperi, meananom):
    """
    Computes the cartesian state given a GM constant and the orbital elemments 
    a, e, incl, longnode, argperi, and meananom.
    
    *Returns*
        (x, y, z, xd, yd, zd) : tuple of floats
    
    """

    _cartesian = lib.cartesian
    _cartesian.argtypes = (c_double, c_double, c_double, c_double, c_double,
                           c_double, c_double, POINTER(Classes.State))
    _cartesian.restype = None

    state = Classes.State(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)

    return_value = _cartesian(GM, a, e, incl, longnode, argperi, meananom,
                              byref(state))

    return state
Example #3
0
	def getStateChildren(self,state):
		children = []

		node_has_stack = state.stacks.get(state.location)
		
		#Handle actions load and unload
		if node_has_stack != None:
			#print(node.stack)
			
			#copy stack for next branch level 
			stacks_copy = copy.deepcopy(state.stacks)
			stack_at_current_node = stacks_copy[state.location]
			
			#Check for casks in stack, and if load is possible
			if stack_at_current_node.stackSpaceOccupied() > 0 and not state.loaded:

				print("casks before cask pop")
				for c in stacks_copy[state.location].stored_casks:
					print("cask name", c.c_id)
				cask = stack_at_current_node.removeCaskFromStack()
				cost_of_action = cask.weight + 1
				new_total_cost = (state.total_cost + cost_of_action)
				stacks_copy[state.location] = stack_at_current_node
				print("casks after cask pop")
				#for c in stacks_copy[state.location].stored_casks:
				#	print("cask name", c.c_id)
				
				cask_list = copy.deepcopy(state.cask_list)
				cask_list.append(cask.c_id)
				temp_state = State(state.location,True,cost_of_action,new_total_cost,state,"load", cask, stacks_copy,cask_list, {}, state.mission + 1)
				#temp_state.mission.mission += 1
				print("mission number", temp_state.mission, temp_state.casks_handled)
				if not self.stateExplored(temp_state):
					temp_state.casks_handled[cask.c_id] = True
					children.append(temp_state)
				print("mission number", temp_state.mission, temp_state.casks_handled)

			#If CTS has cask unload if available space in stack
			elif state.loaded:	
				if stack_at_current_node.stackSpaceFree() >= state.cask.length:
					cost_of_action = state.cask.weight + 1
					new_total_cost = (state.total_cost + cost_of_action)

					stack_at_current_node.addCaskToStack(state.cask)
					stacks_copy[state.location] = stack_at_current_node
					temp_state = classes.State(state.location,False,cost_of_action,new_total_cost,state,"unload", None, stacks_copy, state.cask_list,{}, state.mission + 1)
					#temp_state.mission.mission += 1
					if not self.stateExplored(temp_state):
						children.append(temp_state)

		
		#Handle action move
		node_num = self.node_name_to_num.get(state.location)
		if node_num == None:
			print("trying to access undefined Node_num aborting program")
			os.exit(0)

		for col_num in range(0,self.num_nodes):
			if self.adj_matrix[node_num][col_num] < inf:
				location = self.node_num_to_name.get(col_num)
				if location == None:
					print("tried to access unexisting node")
					os.exit(0)
				loaded = state.loaded #if loaded in previous state, it is still loaded after a move
				new_total_cost = state.total_cost # add previous action costs to total cost
				cost_of_action = 0
				
				#calculate cost of move
				if(loaded):
					cost_of_action = (state.cask.weight + 1)*self.adj_matrix[node_num][col_num]
					new_total_cost += cost_of_action
				else:
					cost_of_action = self.adj_matrix[node_num][col_num]
					new_total_cost += cost_of_action

				#create new state, check if it is not already explored					
				temp_state = State(location,loaded,cost_of_action, new_total_cost, state,"move", state.cask, state.stacks,state.cask_list,state.casks_handled, state.mission)
				if not self.stateExplored(temp_state):
					children.append(temp_state)

		#print("el in children", len(children))
		for el in children:
			print("child ", el.location,el.total_cost,el.action,el.cost_of_action)
		return children
Example #4
0
    def reward_callback(
            self,
            msg):  #receive state from control_motion and calculate reward
        if self.i > self.last_saved_index:
            self.get_logger().info('I heard truncated:{}'.format(data.header))
            fallen_status = msg.fallen_status
            roll, pitch, yaw = msg.orientation
            rpy_vel_x, rpy_vel_y, rpy_vel_z = msg.rpy_vel
            pos_x, pos_y, pos_z = msg.pos  #TODO: y_pos is different than distance!!! distance should be minimin
            vx, vy, vz = msg.vel
            sim_time = msg.sim_time

            hip_l_z, hip_l_x, hip_l_y, knee_l, ankle_l_y, ankle_l_x, hip_r_z, hip_r_x, hip_r_y, knee_r, ankle_r_y, ankle_r_x = msg.joint_positions

            joint_states = [
                hip_l_x - hip_x_min_temp, hip_l_y - hip_y_min_temp,
                knee_l - knee_min_temp, ankle_l_y - ankle_y_min_temp,
                ankle_l_x - ankle_x_min_temp, hip_r_x - hip_x_min_temp,
                hip_r_y - hip_y_min_temp, knee_r - knee_min_temp,
                ankle_r_y - ankle_y_min_temp, ankle_r_x - ankle_x_min_temp,
                hip_l_z - hip_z_min_temp, hip_r_z - hip_z_min_temp
            ]

            distance_old = distance_new
            distance_new = msg.distance_minimin
            s1 = State(joint_states, [roll, pitch, yaw],
                       [rpy_vel_x, rpy_vel_y, rpy_vel_z],
                       [position_x, position_z], [vx, vy, vz],
                       true_joint_states, distance_new)
            #true joint states is a misnomer. It means joint_position, where lower_limit < joint_position < upper_limit

            distance_hist.append(distance_new)
            reward = 0.0

            print('ROLL:{} PITCH:{} YAW:{}'.format(abs(roll), abs(pitch),
                                                   abs(yaw)))

            flat_bonus = 0
            roll_bonus = 0  #3 * abs(np.tanh(.05/roll))
            try:
                pitch_bonus = .1 * abs(np.tanh(.04 / pitch))
                if pitch_bonus > 1:
                    pitch_bonus = 1.
            except:
                pitch_bonus = 1.
            yaw_bonus = -.2 * abs(
                np.tanh(math.pi -
                        abs(yaw)))  #-.2 * abs(np.tanh(math.pi - abs(yaw)))

            time_bonus = .1  #.01

            distance_bonus = 0

            distance_bonus = 10.0 * (distance_new - distance_old)

            # All rewards shuold be continuous and differentiable
            # Rewards should not be cumulative (ex. reward=1 for lasting 1 second, reward=5 for lasting 5 seconds, etc) or this will throw off the reward estimator
            reward = distance_bonus + pitch_bonus + yaw_bonus + time_bonus
            reward_total += reward
            print(s1.y_pos)
            obs_ = np.array([s1.positions[1]] + s1.position_vel + s1.rpy +
                            s1.rpy_vel +
                            s1.joint_states)  # + s1.y_pos)# + list(action))

            replay = Replay()
            replay.obs = obs
            replay.action = action
            replay.reward = reward
            replay.obs_ = obs_
            replay.fallen_status = float(
                fallen_status)  #TODO: differentiate between fallen & terminal
            self.replay_pub.publish(replay)

            reward_total = 0.0

            score += reward

            if v == True:
                print('distance_bonus:{}'.format(distance_bonus))
                print('pitch_bonus:{}'.format(pitch_bonus))
                print('yaw_bonus:{}'.format(yaw_bonus))
                print('reward:{}'.format(reward))
                print('reward_total:{}'.format(reward_total))
                print('score:{}'.format(score))
            if fallen_status == 1:
                # if the robot falls, s1 is automatically back to neutral position, so we'll update all states to also be neutral
                if testing == False:
                    if i > last_saved_index + 0:
                        pass
                        #this may be unnecessary, since we're updating in replay_callback each iteration
                        #agent.update(replay_buffer, j, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay)

                distance_hist_long.append(max(distance_hist))
                distance_new = distance_old = distance = -.02  #TODO: check this value

                #action = action_init
                distance_hist = []
                s.rpy = rpy_init
                s.rpy_vel = rpy_vel_init
                s.positions = positions_init
                s.position_vel = vel_init
                s.joint_states = joint_states_init  #, [0.0, 0.0, 0.0, 0.0]]
                s.true_joint_states = true_joint_states_init
                s.y_pos = y_pos_init
                s1 = s
                j = 0
                reward_total = 0.0
                score_hist.append(score)
                score = 0
                # when exactly would this be called?
        print('\nround {}'.format(self.i))
        print('j:{}'.format(self.j))
        # update current state
        #print('last action:{}'.format(action))
        self.s = classes.State(self.s1.joint_states, self.s1.rpy,
                               self.s1.rpy_vel, self.s1.positions,
                               self.s1.position_vel, self.s1.true_joint_states,
                               self.s1.y_pos)

        print('s positions:{}'.format(self.s.positions))
        print('s velocities:{}'.format(self.s.position_vel))
        print('s rpy:{}'.format(self.s.rpy))
        print('s rpy_vel:{}'.format(self.s.rpy_vel))

        obs = np.array([self.s.positions[1]] + self.s.position_vel +
                       self.s.rpy + self.s.rpy_vel + self.s.joint_states)
        #print('obs:{}'.format(obs))
        action = self.agent.select_action(obs)
        print('raw action:{}'.format(action))
        if self.testing == False:
            exploration_noise = self.exploration_noise_init
            noise = np.random.normal(0,
                                     exploration_noise,
                                     size=self.num_actions)
            action = action + noise
            override = False
            if override == True:
                action[10] = max(min(action[10], .2),
                                 -.2)  # "Clip" hip_z action between +-20%
                action[11] = max(min(action[11], .2), -.2)
            action = action.clip(-1.0, 1.0)
            print('clipped actions:{}'.format(action))
            print('exploration noise:{}'.format(exploration_noise))

        #print('ACTION:{}'.format(action))
        adj_actions = [(x + 1) / 2.
                       for x in action]  # bound actions between 0 and 1

        hip_l_x = (hip_x_max_temp -
                   hip_x_min_temp) * adj_actions[0] + hip_x_min_temp
        hip_l_y = (hip_y_max_temp -
                   hip_y_min_temp) * adj_actions[1] + hip_y_min_temp
        hip_l_z = (hip_z_max_temp -
                   hip_z_min_temp) * adj_actions[10] + hip_z_min_temp
        knee_l = (knee_max_temp -
                  knee_min_temp) * adj_actions[2] + knee_min_temp
        ankle_l_y = (ankle_y_max_temp -
                     ankle_y_min_temp) * adj_actions[3] + ankle_y_min_temp
        ankle_l_x = (ankle_x_max_temp -
                     ankle_x_min_temp) * adj_actions[4] + ankle_x_min_temp
        hip_r_x = (hip_x_max_temp -
                   hip_x_min_temp) * adj_actions[5] + hip_x_min_temp
        hip_r_y = (hip_y_max_temp -
                   hip_y_min_temp) * adj_actions[6] + hip_y_min_temp
        hip_r_z = (hip_z_max_temp -
                   hip_z_min_temp) * adj_actions[11] + hip_z_min_temp
        knee_r = (knee_max_temp -
                  knee_min_temp) * adj_actions[7] + knee_min_temp
        ankle_r_y = (ankle_y_max_temp -
                     ankle_y_min_temp) * adj_actions[8] + ankle_y_min_temp
        ankle_r_x = (ankle_x_max_temp -
                     ankle_x_min_temp) * adj_actions[9] + ankle_x_min_temp

        ball_l = 0.  #(ball_l_max - ball_l_min) * adj_actions[10] + ball_l_min
        ball_r = 0.  #(ball_r_max - ball_r_min) * adj_actions[11] + ball_r_min

        spine_x = 0.
        spine_y = 0.
        spine_z = 0.
        duration = .4
        print('duration:{}'.format(duration))
        tori_joint_angles = ToriJointAngles()
        tori_joint_angles.angles = [
            hip_l_x, hip_l_y, hip_l_z, knee_l, ankle_l_y, ankle_l_x, hip_r_x,
            hip_r_y, hip_r_z, knee_r, ankle_r_y, ankle_r_x, ball_l, ball_r,
            spine_x, spine_y, spine_z
        ]
        tori_joint_angles.duration = duration

        #publish joint commands
        self.joint_angles_pub.publish(tori_joint_angles)
        self.i += 1
        self.j += 1  # TODO: maybe move these higher?
Example #5
0
    def __init__(self):
        self.computer = 'kelsey'
        super().__init__('walk_node_{}'.format(
            self.computer))  #Remember to use ros_bridge
        self.pkl_folder = 'pkl_walk_vanilla'
        self.pause_on_nn = False
        self.score = 0
        self.reward = 0.0
        self.distance_new = 0.0
        self.distance_old = 0.0
        self.reward_total = 0.0
        self.joint_states_init = [
            0.0 - hip_x_min_temp, 0.0 - hip_y_min_temp, 0.0 - knee_min_temp,
            0.0 - ankle_y_min_temp, 0.0 - ankle_x_min_temp,
            0.0 - hip_x_min_temp, 0.0 - hip_y_min_temp, 0.0 - knee_min_temp,
            0.0 - ankle_y_min_temp, 0.0 - ankle_x_min_temp,
            0.0 - hip_z_min_temp, 0.0 - hip_z_min_temp
        ]  #, [0.0, 0.0, 0.0, 0.0]]#, [0.0], [0.0], [0.0, 0.0, 0.0]]
        self.joint_states = self.joint_states_init
        self.true_joint_states_init = [
            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
            0.0, 0.0, 0.0, 0.0, 0.0
        ]
        self.true_joint_states = self.true_joint_states_init
        self.score_hist = []
        self.distance_hist_long = []

        self.fallen_status = 0
        self.vel_init = [0, 0, 0]
        self.positions_init = [0, 0.80]
        self.rpy_init = [0., 0., math.pi]
        self.rpy_vel_init = [0, 0, 0]
        self.y_pos_init = [0.0]
        self.s = self.s1 = classes.State(self.joint_states, self.rpy_init,
                                         self.rpy_vel_init,
                                         self.positions_init, self.vel_init,
                                         self.true_joint_states,
                                         self.y_pos_init)

        self.gamma = 0.99  # discount for future rewards
        self.batch_size = 128  # num of transitions sampled from replay buffer
        self.num_actions = 12
        self.action_init = [
            (hip_x_min_temp / (hip_x_min_temp - hip_x_max_temp)),
            (hip_y_min_temp / (hip_y_min_temp - hip_y_max_temp)),
            (knee_min_temp / (knee_min_temp - knee_max_temp)),
            (ankle_y_min_temp / (ankle_y_min_temp - ankle_y_max_temp)),
            (ankle_x_min_temp / (ankle_x_min_temp - ankle_x_max_temp)),
            (hip_x_min_temp / (hip_x_min_temp - hip_x_max_temp)),
            (hip_y_min_temp / (hip_y_min_temp - hip_y_max_temp)),
            (knee_min_temp / (knee_min_temp - knee_max_temp)),
            (ankle_y_min_temp / (ankle_y_min_temp - ankle_y_max_temp)),
            (ankle_x_min_temp / (ankle_x_min_temp - ankle_x_max_temp)),
            (hip_z_min_temp / (hip_z_min_temp - hip_z_max_temp)),
            (hip_z_min_temp / (hip_z_min_temp - hip_z_max_temp))
        ]
        self.action_init = np.array([(x * 2) - 1 for x in self.action_init])

        self.exploration_noise_init = 0.08  #.10, 0.05
        self.exploration_noise = self.exploration_noise_init
        self.polyak = 0.995  # target policy update parameter (1-tau)
        self.policy_noise = 0.12  #.20, .10          # target policy smoothing noise
        self.noise_clip = 0.5
        self.policy_delay = 2  # delayed policy updates parameter

        self.testing = False
        if self.testing == True:
            self.policy_noise = 0.0
            self.exploration_noise_init = 0.0
            self.exploration_noise = 0.0
        self.last_saved_index = 150000  #minimax at 1165000; lowered effort to 3.92 and renamed i to 1000 at 1419000; instant at 8000
        # policy .05, exp .04 @ 137000; reverted noise to policy .12 and exp .08 @ 200000
        self.distance_hist = []

        if self.last_saved_index > 0:
            self.read_pkl = True
        else:
            self.read_pkl = False

        self.i = self.last_saved_index

        v = True
        self.j = 0
        lr = .0001
        self.num_states = 22
        remove_states = []
        load_weights = True
        read_replay_buffer = True
        add_num_states = 0
        add_actions = 0
        layer_height = 250

        if self.read_pkl == True:

            #agent = NewAgent.load_model('./pkl/agent_{}.pkl'.format(i))

            if load_weights == True:
                print('reading weights...')
                self.agent = TD3.TD3(lr=lr,
                                     state_dim=self.num_states +
                                     add_num_states - len(remove_states),
                                     action_dim=self.num_actions + add_actions,
                                     max_action=1.0,
                                     layer_height=layer_height)

                self.agent.load('./{}'.format(self.pkl_folder),
                                self.i,
                                additional_dims=add_num_states,
                                additional_actions=add_actions,
                                remove_dimensions_=remove_states)
                self.num_actions = self.num_actions + add_actions
                #print('STATES:{}'.format(agent.))
                #agent.state_dim += add_num_states
                #if add_state > 0:

            else:
                print('WARNING: LOADING FULL AGENT')
                self.agent = TD3.TD3.load_model('./{}/agent_{}.pkl'.format(
                    self.pkl_folder, self.i))
                self.agent.use_scheduler = False
            if read_replay_buffer == True:
                print('reading replay buffer...')
                self.replay_buffer = pickle.load(
                    open('./{}/replay_{}.pkl'.format(self.pkl_folder, self.i),
                         'rb'))
            else:
                self.replay_buffer = ReplayBuffer()
        else:
            print('creating agent')
            #agent = NewAgent(alpha=0.000005, beta=0.00001, input_dims=[3], gamma=1.01, layer1_size=30, layer2_size=30, n_outputs=1, n_actions=26) # 26=13*2
            #agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[19], tau=0.001, env='dummy', sigma=.5,
            #          batch_size=100,  layer1_size=200, layer2_size=250, n_actions=12, max_size=100000)
            self.agent = TD3.TD3(lr=lr,
                                 state_dim=self.num_states,
                                 action_dim=self.num_actions,
                                 max_action=1.0,
                                 layer_height=layer_height)
            self.replay_buffer = ReplayBuffer()
        self.state_sub = self.create_subscription(
            State, '/tori_state_{}'.format(self.computer),
            self.state_callback)  # listens for state updates
        self.reward_sub = self.create_subscription(
            State, '/tori_state_{}'.format(self.computer),
            self.reward_callback)  #
        self.replay_sub = self.create_subscription(
            Replay, '/replay',
            self.replay_callback)  # listens for replay messages
        self.replay_pub = self.create_publisher(
            Replay, '/replay',
            qos_profile=1)  # publishes replay to this/other computers
        self.joint_angles_pub = self.create_publisher(
            ToriJointAngles, '/tori_joint_command_{}'.format(self.computer)
        )  # tells control_motion the desired joint positions
        self.checkpoint_sub = self.create_subscription(
            Float64, '/checkpoint_{}'.format(self.computer),
            self.checkpoint_callback)
        self.checkpoit_pub = self.create_publisher(Float64,
                                                   '/checkpoint_{}'.format(
                                                       self.computer),
                                                   qos_profile=0)

        # start training
        self.state_pub = self.create_publisher(State,
                                               '/tori_state_{}'.format(
                                                   self.computer),
                                               qos_profile=1)
        state = State()
        state.fallen_status = float(self.fallen_status)
        state.orientation = self.rpy_init
        state.pos = [0., 0., 0.80
                     ]  #TODO: get position_y_spine, not necessarily minimin
        state.distance_minimum = -.02  #TODO: check this number
        state.rpy_vel = [0., 0., 0.]
        state.vel = [0., 0., 0.]
        state.sim_time = 0.0
        self.state_pub.publish(state)
        print('published!')
true_joint_states_init = [
    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    0.0
]
true_joint_states = true_joint_states_init
score_hist = []
distance_hist_long = []

fallen_status = 0
vel_init = [0, 0, 0]
positions_init = [0, 0.80]
rpy_init = [0, 0, math.pi]
rpy_vel_init = [0, 0, 0]
y_pos_init = [0.0]
s = s1 = classes.State(joint_states, rpy_init, rpy_vel_init, positions_init,
                       vel_init, true_joint_states, y_pos_init)

gamma = 0.99  # discount for future rewards
batch_size = 128  # num of transitions sampled from replay buffer
num_actions = 12
action_init = [(hip_x_min_temp / (hip_x_min_temp - hip_x_max_temp)),
               (hip_y_min_temp / (hip_y_min_temp - hip_y_max_temp)),
               (knee_min_temp / (knee_min_temp - knee_max_temp)),
               (ankle_y_min_temp / (ankle_y_min_temp - ankle_y_max_temp)),
               (ankle_x_min_temp / (ankle_x_min_temp - ankle_x_max_temp)),
               (hip_x_min_temp / (hip_x_min_temp - hip_x_max_temp)),
               (hip_y_min_temp / (hip_y_min_temp - hip_y_max_temp)),
               (knee_min_temp / (knee_min_temp - knee_max_temp)),
               (ankle_y_min_temp / (ankle_y_min_temp - ankle_y_max_temp)),
               (ankle_x_min_temp / (ankle_x_min_temp - ankle_x_max_temp)),
               (hip_z_min_temp / (hip_z_min_temp - hip_z_max_temp)),