Python step Examples

Programming Language: Python

Namespace/Package Name: chainerrl.env

Method/Function: step

Examples at hotexamples.com: 6

Python step - 6 examples found. These are the top rated real world Python examples of chainerrl.env.step extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

0

Show file

File: mcts_ata.py Project: sba086/VSP-MCTS

 def next_state(self, env):              #Execute a random action in the given env and return the resulting environment state as a node State
     a = env.action_space.sample()
     if isinstance(a, np.ndarray):
         a = a.astype(np.float32)
     nextmove = [a]                              
     obs, r, done, info = env.step(nextmove)
     next = State(info, obs, self.rew+r, done)
     return next

Example #2

0

Show file

File: mcts_ata.py Project: sba086/VSP-MCTS

def HOOSTEP(a,tau,env):
    rew = 0
    logger.debug("HOOSTEP")
    for i in range(tau): 
        obs, r, done, info = env.step(a)
        #env.render()
        rew += r
        if done: break
    return obs, rew, done, info

Example #3

0

Show file

File: mcts_ata.py Project: sba086/VSP-MCTS

def UPDATELEGACY(node,env):
    parent = node.parent
    if parent:
        RESTOREENV(env, parent.state.envState)
        obs, r, done, info = env.step(node.state.moves[-1])
        node.state.envState = CLONEENV(env)
        node.state.rew = parent.state.rew + r
        node.state.done=done
    for c in node.children:
        UPDATELEGACY(c,env)
    return

Example #4

0

Show file

File: mcts_ata.py Project: sba086/VSP-MCTS

def DEFAULTPOLICY(state,depth,env):                   #Rollout simulation for allowed time
    logger.debug("DEFAULTPOLICY")
    t=depth
    reward = state.rew
    done = state.terminal()
 #   RESTOREENV(env, state.envState)
    while not done and t < DEPTH_MAX:
        a = env.action_space.sample()
        if isinstance(a, np.ndarray):
            a = a.astype(np.float32)
        nextmove = [a]                              
        obs, r, done, info = env.step(nextmove)
        reward += r*(0.99**t)
        t += 1
    if done: 
        env.reset()
    return reward

Example #5

0

Show file

File: mcts_ata.py Project: sba086/VSP-MCTS

                tau = current_node.state.tau[-1]
                logger.info(" Selected a and tau [%s, %s]"%(a[0],tau))
                prev_root = current_node.parent
                for c in prev_root.children:
                    logger.debug(" [%s %s] visit %s, value %s"%(c.state.moves[-1], c.state.tau[-1], c.visits, c.reward/c.visits))
                lead_node = current_node
 
            Hroot = hoo.HooNode([0, T_MAX])
            new_Hroots = [Hroot for c in range(action_space.n)] 
            n_i = [0 for c in range(action_space.n)]
            r_i = [0 for c in range(action_space.n)]
            #    current_node = Node(State(None,state,n_act=action_space.n,Hroot=new_Hroots, n_act_i=n_i, r_act_i=r_i, rState=True))
            if tau == 1:
                current_node = lead_node           # For tau=1 after initial selections
                
                obs, r, done, info = env.step(a)
                #    env.render()
                rew += r
                t += 1
                state = CLONEENV(env)
                
                current_node.state.envState = state
                current_node.state.rew = test_r*args.reward_scale_factor
           #     current_node.state.done = done
                current_node.parent = None
                current_node.state.Hroots = new_Hroots
                current_node.state.n_act_i = n_i
                current_node.state.r_act_i = r_i
                current_node.state.n_act_icp = n_i
                current_node.state.Hfront = None
                current_node.state.rootState = True

Example #6

0

Show file

File: chaintest4.py Project: ushitora/vizdoom_experiments


for i in range(1, 1 + 1):
    obs = env.reset()
    #obs = resize(rgb2gray(env.reset()),(80,80))
    #obs = obs[np.newaxis, :, :]

    reward = 0
    done = False
    R = 0

    while not done:
        action = agent.act(obs)
        #action = agent.act_and_train(obs, reward)
        #action = agent.act(obs)
        obs, reward, done, _ = env.step(action)
        #obs = resize(rgb2gray(obs), (80, 80))
        #obs = obs[np.newaxis, :, :]


    agent.stop_episode()


last_time = datetime.datetime.now()

filename = "toreplace"
env.set_window(False)


print("Starting the training!")