Python State Examples

Programming Language: Python

Namespace/Package Name: classes

Method/Function: State

Examples at hotexamples.com: 6

Python State - 6 examples found. These are the top rated real world Python examples of classes.State extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: eightPuzzle.py Project: Daytron/8-puzzle-solver

def button_find_solution():
    """
    :description: Starts searching for solution using A star algorithm
        note: all openStates and closed_states lists hold OBJECTS not lists of tiles
    :return: None
    """
    global master_states, openStates, explored_states, isButtonFindSolutionOn, isButtonShowSolutionOn, \
        puzzle_state

    if isButtonFindSolutionOn is True:
        # print goalState
        # create object for initial state and save to master states list
        master_states.append(classes.State(initState, initState, goalState, 0))
        openStates.append(master_states[0])
        closed_states = []

        while openStates:
            x = openStates[0]
            openStates.pop(0)

            # Checks if the goal state is found
            if x.node == goalState:
                for state in closed_states:
                    explored_states.append(state)
                explored_states.append(x)

                # (function call) If the goal is found, trace down the path back to the initial state
                # Neglect any non-essential states
                find_path()

                # (function call) displays the result in the console
                display_solution()

                isButtonFindSolutionOn = False
                isButtonShowSolutionOn = True
                puzzle_state = list(initState)
                return None

            # Otherwise keep looking by expanding more states
            #   and assign the states with heuristic values.
            else:
                x.generate_children(master_states)

                for child in x.children:
                    master_states.append(
                        classes.State(child, initState, goalState, (x.gn + 1)))
                    if not (master_states[-1] in openStates
                            or master_states[-1] in closed_states):
                        openStates.append(master_states[-1])
                closed_states.append(x)

                # (function call) This is where the open variable is sorted according to their f(n) value
                #       from lowest to highest.
                reorder_heuristics()
        else:
            print "Failed to find a solution"

Example #2

Show file

File: kepcart.py Project: Gerenjie/Centaur-Model

def cartesian(GM, a, e, incl, longnode, argperi, meananom):
    """
    Computes the cartesian state given a GM constant and the orbital elemments 
    a, e, incl, longnode, argperi, and meananom.
    
    *Returns*
        (x, y, z, xd, yd, zd) : tuple of floats
    
    """

    _cartesian = lib.cartesian
    _cartesian.argtypes = (c_double, c_double, c_double, c_double, c_double,
                           c_double, c_double, POINTER(Classes.State))
    _cartesian.restype = None

    state = Classes.State(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)

    return_value = _cartesian(GM, a, e, incl, longnode, argperi, meananom,
                              byref(state))

    return state

Example #3

Show file

File: uninformed.py Project: mobildetektiven/AI

	def getStateChildren(self,state):
		children = []

		node_has_stack = state.stacks.get(state.location)
		
		#Handle actions load and unload
		if node_has_stack != None:
			#print(node.stack)
			
			#copy stack for next branch level 
			stacks_copy = copy.deepcopy(state.stacks)
			stack_at_current_node = stacks_copy[state.location]
			
			#Check for casks in stack, and if load is possible
			if stack_at_current_node.stackSpaceOccupied() > 0 and not state.loaded:

				print("casks before cask pop")
				for c in stacks_copy[state.location].stored_casks:
					print("cask name", c.c_id)
				cask = stack_at_current_node.removeCaskFromStack()
				cost_of_action = cask.weight + 1
				new_total_cost = (state.total_cost + cost_of_action)
				stacks_copy[state.location] = stack_at_current_node
				print("casks after cask pop")
				#for c in stacks_copy[state.location].stored_casks:
				#	print("cask name", c.c_id)
				
				cask_list = copy.deepcopy(state.cask_list)
				cask_list.append(cask.c_id)
				temp_state = State(state.location,True,cost_of_action,new_total_cost,state,"load", cask, stacks_copy,cask_list, {}, state.mission + 1)
				#temp_state.mission.mission += 1
				print("mission number", temp_state.mission, temp_state.casks_handled)
				if not self.stateExplored(temp_state):
					temp_state.casks_handled[cask.c_id] = True
					children.append(temp_state)
				print("mission number", temp_state.mission, temp_state.casks_handled)

			#If CTS has cask unload if available space in stack
			elif state.loaded:	
				if stack_at_current_node.stackSpaceFree() >= state.cask.length:
					cost_of_action = state.cask.weight + 1
					new_total_cost = (state.total_cost + cost_of_action)

					stack_at_current_node.addCaskToStack(state.cask)
					stacks_copy[state.location] = stack_at_current_node
					temp_state = classes.State(state.location,False,cost_of_action,new_total_cost,state,"unload", None, stacks_copy, state.cask_list,{}, state.mission + 1)
					#temp_state.mission.mission += 1
					if not self.stateExplored(temp_state):
						children.append(temp_state)

		
		#Handle action move
		node_num = self.node_name_to_num.get(state.location)
		if node_num == None:
			print("trying to access undefined Node_num aborting program")
			os.exit(0)

		for col_num in range(0,self.num_nodes):
			if self.adj_matrix[node_num][col_num] < inf:
				location = self.node_num_to_name.get(col_num)
				if location == None:
					print("tried to access unexisting node")
					os.exit(0)
				loaded = state.loaded #if loaded in previous state, it is still loaded after a move
				new_total_cost = state.total_cost # add previous action costs to total cost
				cost_of_action = 0
				
				#calculate cost of move
				if(loaded):
					cost_of_action = (state.cask.weight + 1)*self.adj_matrix[node_num][col_num]
					new_total_cost += cost_of_action
				else:
					cost_of_action = self.adj_matrix[node_num][col_num]
					new_total_cost += cost_of_action

				#create new state, check if it is not already explored					
				temp_state = State(location,loaded,cost_of_action, new_total_cost, state,"move", state.cask, state.stacks,state.cask_list,state.casks_handled, state.mission)
				if not self.stateExplored(temp_state):
					children.append(temp_state)

		#print("el in children", len(children))
		for el in children:
			print("child ", el.location,el.total_cost,el.action,el.cost_of_action)
		return children

Example #4

Show file

    def reward_callback(
            self,
            msg):  #receive state from control_motion and calculate reward
        if self.i > self.last_saved_index:
            self.get_logger().info('I heard truncated:{}'.format(data.header))
            fallen_status = msg.fallen_status
            roll, pitch, yaw = msg.orientation
            rpy_vel_x, rpy_vel_y, rpy_vel_z = msg.rpy_vel
            pos_x, pos_y, pos_z = msg.pos  #TODO: y_pos is different than distance!!! distance should be minimin
            vx, vy, vz = msg.vel
            sim_time = msg.sim_time

            hip_l_z, hip_l_x, hip_l_y, knee_l, ankle_l_y, ankle_l_x, hip_r_z, hip_r_x, hip_r_y, knee_r, ankle_r_y, ankle_r_x = msg.joint_positions

            joint_states = [
                hip_l_x - hip_x_min_temp, hip_l_y - hip_y_min_temp,
                knee_l - knee_min_temp, ankle_l_y - ankle_y_min_temp,
                ankle_l_x - ankle_x_min_temp, hip_r_x - hip_x_min_temp,
                hip_r_y - hip_y_min_temp, knee_r - knee_min_temp,
                ankle_r_y - ankle_y_min_temp, ankle_r_x - ankle_x_min_temp,
                hip_l_z - hip_z_min_temp, hip_r_z - hip_z_min_temp
            ]

            distance_old = distance_new
            distance_new = msg.distance_minimin
            s1 = State(joint_states, [roll, pitch, yaw],
                       [rpy_vel_x, rpy_vel_y, rpy_vel_z],
                       [position_x, position_z], [vx, vy, vz],
                       true_joint_states, distance_new)
            #true joint states is a misnomer. It means joint_position, where lower_limit < joint_position < upper_limit

            distance_hist.append(distance_new)
            reward = 0.0

            print('ROLL:{} PITCH:{} YAW:{}'.format(abs(roll), abs(pitch),
                                                   abs(yaw)))

            flat_bonus = 0
            roll_bonus = 0  #3 * abs(np.tanh(.05/roll))
            try:
                pitch_bonus = .1 * abs(np.tanh(.04 / pitch))
                if pitch_bonus > 1:
                    pitch_bonus = 1.
            except:
                pitch_bonus = 1.
            yaw_bonus = -.2 * abs(
                np.tanh(math.pi -
                        abs(yaw)))  #-.2 * abs(np.tanh(math.pi - abs(yaw)))

            time_bonus = .1  #.01

            distance_bonus = 0

            distance_bonus = 10.0 * (distance_new - distance_old)

            # All rewards shuold be continuous and differentiable
            # Rewards should not be cumulative (ex. reward=1 for lasting 1 second, reward=5 for lasting 5 seconds, etc) or this will throw off the reward estimator
            reward = distance_bonus + pitch_bonus + yaw_bonus + time_bonus
            reward_total += reward
            print(s1.y_pos)
            obs_ = np.array([s1.positions[1]] + s1.position_vel + s1.rpy +
                            s1.rpy_vel +
                            s1.joint_states)  # + s1.y_pos)# + list(action))

            replay = Replay()
            replay.obs = obs
            replay.action = action
            replay.reward = reward
            replay.obs_ = obs_
            replay.fallen_status = float(
                fallen_status)  #TODO: differentiate between fallen & terminal
            self.replay_pub.publish(replay)

            reward_total = 0.0

            score += reward

            if v == True:
                print('distance_bonus:{}'.format(distance_bonus))
                print('pitch_bonus:{}'.format(pitch_bonus))
                print('yaw_bonus:{}'.format(yaw_bonus))
                print('reward:{}'.format(reward))
                print('reward_total:{}'.format(reward_total))
                print('score:{}'.format(score))
            if fallen_status == 1:
                # if the robot falls, s1 is automatically back to neutral position, so we'll update all states to also be neutral
                if testing == False:
                    if i > last_saved_index + 0:
                        pass
                        #this may be unnecessary, since we're updating in replay_callback each iteration
                        #agent.update(replay_buffer, j, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay)

                distance_hist_long.append(max(distance_hist))
                distance_new = distance_old = distance = -.02  #TODO: check this value

                #action = action_init
                distance_hist = []
                s.rpy = rpy_init
                s.rpy_vel = rpy_vel_init
                s.positions = positions_init
                s.position_vel = vel_init
                s.joint_states = joint_states_init  #, [0.0, 0.0, 0.0, 0.0]]
                s.true_joint_states = true_joint_states_init
                s.y_pos = y_pos_init
                s1 = s
                j = 0
                reward_total = 0.0
                score_hist.append(score)
                score = 0
                # when exactly would this be called?
        print('\nround {}'.format(self.i))
        print('j:{}'.format(self.j))
        # update current state
        #print('last action:{}'.format(action))
        self.s = classes.State(self.s1.joint_states, self.s1.rpy,
                               self.s1.rpy_vel, self.s1.positions,
                               self.s1.position_vel, self.s1.true_joint_states,
                               self.s1.y_pos)

        print('s positions:{}'.format(self.s.positions))
        print('s velocities:{}'.format(self.s.position_vel))
        print('s rpy:{}'.format(self.s.rpy))
        print('s rpy_vel:{}'.format(self.s.rpy_vel))

        obs = np.array([self.s.positions[1]] + self.s.position_vel +
                       self.s.rpy + self.s.rpy_vel + self.s.joint_states)
        #print('obs:{}'.format(obs))
        action = self.agent.select_action(obs)
        print('raw action:{}'.format(action))
        if self.testing == False:
            exploration_noise = self.exploration_noise_init
            noise = np.random.normal(0,
                                     exploration_noise,
                                     size=self.num_actions)
            action = action + noise
            override = False
            if override == True:
                action[10] = max(min(action[10], .2),
                                 -.2)  # "Clip" hip_z action between +-20%
                action[11] = max(min(action[11], .2), -.2)
            action = action.clip(-1.0, 1.0)
            print('clipped actions:{}'.format(action))
            print('exploration noise:{}'.format(exploration_noise))

        #print('ACTION:{}'.format(action))
        adj_actions = [(x + 1) / 2.
                       for x in action]  # bound actions between 0 and 1

        hip_l_x = (hip_x_max_temp -
                   hip_x_min_temp) * adj_actions[0] + hip_x_min_temp
        hip_l_y = (hip_y_max_temp -
                   hip_y_min_temp) * adj_actions[1] + hip_y_min_temp
        hip_l_z = (hip_z_max_temp -
                   hip_z_min_temp) * adj_actions[10] + hip_z_min_temp
        knee_l = (knee_max_temp -
                  knee_min_temp) * adj_actions[2] + knee_min_temp
        ankle_l_y = (ankle_y_max_temp -
                     ankle_y_min_temp) * adj_actions[3] + ankle_y_min_temp
        ankle_l_x = (ankle_x_max_temp -
                     ankle_x_min_temp) * adj_actions[4] + ankle_x_min_temp
        hip_r_x = (hip_x_max_temp -
                   hip_x_min_temp) * adj_actions[5] + hip_x_min_temp
        hip_r_y = (hip_y_max_temp -
                   hip_y_min_temp) * adj_actions[6] + hip_y_min_temp
        hip_r_z = (hip_z_max_temp -
                   hip_z_min_temp) * adj_actions[11] + hip_z_min_temp
        knee_r = (knee_max_temp -
                  knee_min_temp) * adj_actions[7] + knee_min_temp
        ankle_r_y = (ankle_y_max_temp -
                     ankle_y_min_temp) * adj_actions[8] + ankle_y_min_temp
        ankle_r_x = (ankle_x_max_temp -
                     ankle_x_min_temp) * adj_actions[9] + ankle_x_min_temp

        ball_l = 0.  #(ball_l_max - ball_l_min) * adj_actions[10] + ball_l_min
        ball_r = 0.  #(ball_r_max - ball_r_min) * adj_actions[11] + ball_r_min

        spine_x = 0.
        spine_y = 0.
        spine_z = 0.
        duration = .4
        print('duration:{}'.format(duration))
        tori_joint_angles = ToriJointAngles()
        tori_joint_angles.angles = [
            hip_l_x, hip_l_y, hip_l_z, knee_l, ankle_l_y, ankle_l_x, hip_r_x,
            hip_r_y, hip_r_z, knee_r, ankle_r_y, ankle_r_x, ball_l, ball_r,
            spine_x, spine_y, spine_z
        ]
        tori_joint_angles.duration = duration

        #publish joint commands
        self.joint_angles_pub.publish(tori_joint_angles)
        self.i += 1
        self.j += 1  # TODO: maybe move these higher?

Example #5

Show file

    def __init__(self):
        self.computer = 'kelsey'
        super().__init__('walk_node_{}'.format(
            self.computer))  #Remember to use ros_bridge
        self.pkl_folder = 'pkl_walk_vanilla'
        self.pause_on_nn = False
        self.score = 0
        self.reward = 0.0
        self.distance_new = 0.0
        self.distance_old = 0.0
        self.reward_total = 0.0
        self.joint_states_init = [
            0.0 - hip_x_min_temp, 0.0 - hip_y_min_temp, 0.0 - knee_min_temp,
            0.0 - ankle_y_min_temp, 0.0 - ankle_x_min_temp,
            0.0 - hip_x_min_temp, 0.0 - hip_y_min_temp, 0.0 - knee_min_temp,
            0.0 - ankle_y_min_temp, 0.0 - ankle_x_min_temp,
            0.0 - hip_z_min_temp, 0.0 - hip_z_min_temp
        ]  #, [0.0, 0.0, 0.0, 0.0]]#, [0.0], [0.0], [0.0, 0.0, 0.0]]
        self.joint_states = self.joint_states_init
        self.true_joint_states_init = [
            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
            0.0, 0.0, 0.0, 0.0, 0.0
        ]
        self.true_joint_states = self.true_joint_states_init
        self.score_hist = []
        self.distance_hist_long = []

        self.fallen_status = 0
        self.vel_init = [0, 0, 0]
        self.positions_init = [0, 0.80]
        self.rpy_init = [0., 0., math.pi]
        self.rpy_vel_init = [0, 0, 0]
        self.y_pos_init = [0.0]
        self.s = self.s1 = classes.State(self.joint_states, self.rpy_init,
                                         self.rpy_vel_init,
                                         self.positions_init, self.vel_init,
                                         self.true_joint_states,
                                         self.y_pos_init)

        self.gamma = 0.99  # discount for future rewards
        self.batch_size = 128  # num of transitions sampled from replay buffer
        self.num_actions = 12
        self.action_init = [
            (hip_x_min_temp / (hip_x_min_temp - hip_x_max_temp)),
            (hip_y_min_temp / (hip_y_min_temp - hip_y_max_temp)),
            (knee_min_temp / (knee_min_temp - knee_max_temp)),
            (ankle_y_min_temp / (ankle_y_min_temp - ankle_y_max_temp)),
            (ankle_x_min_temp / (ankle_x_min_temp - ankle_x_max_temp)),
            (hip_x_min_temp / (hip_x_min_temp - hip_x_max_temp)),
            (hip_y_min_temp / (hip_y_min_temp - hip_y_max_temp)),
            (knee_min_temp / (knee_min_temp - knee_max_temp)),
            (ankle_y_min_temp / (ankle_y_min_temp - ankle_y_max_temp)),
            (ankle_x_min_temp / (ankle_x_min_temp - ankle_x_max_temp)),
            (hip_z_min_temp / (hip_z_min_temp - hip_z_max_temp)),
            (hip_z_min_temp / (hip_z_min_temp - hip_z_max_temp))
        ]
        self.action_init = np.array([(x * 2) - 1 for x in self.action_init])

        self.exploration_noise_init = 0.08  #.10, 0.05
        self.exploration_noise = self.exploration_noise_init
        self.polyak = 0.995  # target policy update parameter (1-tau)
        self.policy_noise = 0.12  #.20, .10          # target policy smoothing noise
        self.noise_clip = 0.5
        self.policy_delay = 2  # delayed policy updates parameter

        self.testing = False
        if self.testing == True:
            self.policy_noise = 0.0
            self.exploration_noise_init = 0.0
            self.exploration_noise = 0.0
        self.last_saved_index = 150000  #minimax at 1165000; lowered effort to 3.92 and renamed i to 1000 at 1419000; instant at 8000
        # policy .05, exp .04 @ 137000; reverted noise to policy .12 and exp .08 @ 200000
        self.distance_hist = []

        if self.last_saved_index > 0:
            self.read_pkl = True
        else:
            self.read_pkl = False

        self.i = self.last_saved_index

        v = True
        self.j = 0
        lr = .0001
        self.num_states = 22
        remove_states = []
        load_weights = True
        read_replay_buffer = True
        add_num_states = 0
        add_actions = 0
        layer_height = 250

        if self.read_pkl == True:

            #agent = NewAgent.load_model('./pkl/agent_{}.pkl'.format(i))

            if load_weights == True:
                print('reading weights...')
                self.agent = TD3.TD3(lr=lr,
                                     state_dim=self.num_states +
                                     add_num_states - len(remove_states),
                                     action_dim=self.num_actions + add_actions,
                                     max_action=1.0,
                                     layer_height=layer_height)

                self.agent.load('./{}'.format(self.pkl_folder),
                                self.i,
                                additional_dims=add_num_states,
                                additional_actions=add_actions,
                                remove_dimensions_=remove_states)
                self.num_actions = self.num_actions + add_actions
                #print('STATES:{}'.format(agent.))
                #agent.state_dim += add_num_states
                #if add_state > 0:

            else:
                print('WARNING: LOADING FULL AGENT')
                self.agent = TD3.TD3.load_model('./{}/agent_{}.pkl'.format(
                    self.pkl_folder, self.i))
                self.agent.use_scheduler = False
            if read_replay_buffer == True:
                print('reading replay buffer...')
                self.replay_buffer = pickle.load(
                    open('./{}/replay_{}.pkl'.format(self.pkl_folder, self.i),
                         'rb'))
            else:
                self.replay_buffer = ReplayBuffer()
        else:
            print('creating agent')
            #agent = NewAgent(alpha=0.000005, beta=0.00001, input_dims=[3], gamma=1.01, layer1_size=30, layer2_size=30, n_outputs=1, n_actions=26) # 26=13*2
            #agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[19], tau=0.001, env='dummy', sigma=.5,
            #          batch_size=100,  layer1_size=200, layer2_size=250, n_actions=12, max_size=100000)
            self.agent = TD3.TD3(lr=lr,
                                 state_dim=self.num_states,
                                 action_dim=self.num_actions,
                                 max_action=1.0,
                                 layer_height=layer_height)
            self.replay_buffer = ReplayBuffer()
        self.state_sub = self.create_subscription(
            State, '/tori_state_{}'.format(self.computer),
            self.state_callback)  # listens for state updates
        self.reward_sub = self.create_subscription(
            State, '/tori_state_{}'.format(self.computer),
            self.reward_callback)  #
        self.replay_sub = self.create_subscription(
            Replay, '/replay',
            self.replay_callback)  # listens for replay messages
        self.replay_pub = self.create_publisher(
            Replay, '/replay',
            qos_profile=1)  # publishes replay to this/other computers
        self.joint_angles_pub = self.create_publisher(
            ToriJointAngles, '/tori_joint_command_{}'.format(self.computer)
        )  # tells control_motion the desired joint positions
        self.checkpoint_sub = self.create_subscription(
            Float64, '/checkpoint_{}'.format(self.computer),
            self.checkpoint_callback)
        self.checkpoit_pub = self.create_publisher(Float64,
                                                   '/checkpoint_{}'.format(
                                                       self.computer),
                                                   qos_profile=0)

        # start training
        self.state_pub = self.create_publisher(State,
                                               '/tori_state_{}'.format(
                                                   self.computer),
                                               qos_profile=1)
        state = State()
        state.fallen_status = float(self.fallen_status)
        state.orientation = self.rpy_init
        state.pos = [0., 0., 0.80
                     ]  #TODO: get position_y_spine, not necessarily minimin
        state.distance_minimum = -.02  #TODO: check this number
        state.rpy_vel = [0., 0., 0.]
        state.vel = [0., 0., 0.]
        state.sim_time = 0.0
        self.state_pub.publish(state)
        print('published!')

Example #6

Show file

File: walk_vanilla.py Project: Bentell-Robotics/Tori-Robot

true_joint_states_init = [
    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    0.0
]
true_joint_states = true_joint_states_init
score_hist = []
distance_hist_long = []

fallen_status = 0
vel_init = [0, 0, 0]
positions_init = [0, 0.80]
rpy_init = [0, 0, math.pi]
rpy_vel_init = [0, 0, 0]
y_pos_init = [0.0]
s = s1 = classes.State(joint_states, rpy_init, rpy_vel_init, positions_init,
                       vel_init, true_joint_states, y_pos_init)

gamma = 0.99  # discount for future rewards
batch_size = 128  # num of transitions sampled from replay buffer
num_actions = 12
action_init = [(hip_x_min_temp / (hip_x_min_temp - hip_x_max_temp)),
               (hip_y_min_temp / (hip_y_min_temp - hip_y_max_temp)),
               (knee_min_temp / (knee_min_temp - knee_max_temp)),
               (ankle_y_min_temp / (ankle_y_min_temp - ankle_y_max_temp)),
               (ankle_x_min_temp / (ankle_x_min_temp - ankle_x_max_temp)),
               (hip_x_min_temp / (hip_x_min_temp - hip_x_max_temp)),
               (hip_y_min_temp / (hip_y_min_temp - hip_y_max_temp)),
               (knee_min_temp / (knee_min_temp - knee_max_temp)),
               (ankle_y_min_temp / (ankle_y_min_temp - ankle_y_max_temp)),
               (ankle_x_min_temp / (ankle_x_min_temp - ankle_x_max_temp)),
               (hip_z_min_temp / (hip_z_min_temp - hip_z_max_temp)),