def main():

    env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
    policy = ContinuousDownwardBiasPolicy()

    while True:
        obs, done = env.reset(), False
        print("===================================")
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render()
            act = policy.sample_action(obs, .1)
            print("Action")
            print(act)
            obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
            episode_rew += rew
        print("Episode reward", episode_rew)
def main():
    
    env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
    policy = ContinuousDownwardBiasPolicy()

    while True:
        obs, done = env.reset(), False
        print("===================================")        
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render(mode='human')
            act = policy.sample_action(obs, .1)
            print("Action")
            print(act)
            obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
            episode_rew += rew
        print("Episode reward", episode_rew)
Esempio n. 3
0
import os
import psutil
import pybullet
from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv  # NOQA

env = KukaDiverseObjectEnv()
process = psutil.Process(os.getpid())
pybullet.setPhysicsEngineParameter(enableFileCaching=0)

for i in range(50):
    env.reset()
    print(i, process.memory_info().rss)
Esempio n. 4
0
            load_param = pickle.load(file)

        start_episode = load_param[0]
        ep_reward_list = load_param[1]
        avg_reward_list = load_param[2]

    else:
        start_episode = 0
        ep_reward_list = []
        avg_reward_list = []

    actor_loss, critic_loss = 0, 0
    best_score = -np.inf
    print('Main training loop')
    for episode in range(start_episode, MAX_EPISODES):
        obsv = env.reset()
        state = np.asarray(
            obsv, dtype=np.float32) / 255.0  # convert into float array
        episodic_reward = 0
        frames = []
        steps = 0
        while True:
            if episode > MAX_EPISODES - 3:
                frames.append(env.render(mode='rgb_array'))

            # take an action as per the policy
            if episode < RAND_EPS:  # explore for some episodes
                action = env.action_space.sample()
            else:
                action = agent.policy(state)
Esempio n. 5
0
    screen = env._get_observation().transpose((2, 0, 1))  # 将地二轴和第零轴的数据进行转置
    #[screen, depth, segement] = 
    # Convert to float, rescale, convert to torch tensor
    # (this doesn't require a copy)
    screen = np.ascontiguousarray(screen, dtype=np.float32) / 255 #返回的类型要是float类型
    screen = torch.from_numpy(screen)  #从numpy获取数据转换成torch用的sensor
    # Resize, and add a batch dimension (BCHW)
    return resize(screen).unsqueeze(0).to(device)  #返回的图像返回到torch里面去


##############


# 训练的过程

env.reset() #环境需要重置

num_agents = 1  #什么意思?
print('Number of agents:', num_agents)

init_screen = get_screen()
_, _, screen_height, screen_width = init_screen.shape  #确定屏幕的大小

action_size = env.action_space.shape[0] #动作空间的大小,三个方向吗?由env决定的
print('Size of each action:', action_size)

    #展示第一张模拟相机图片  //  matplotlib里面的调用和matlab里面相似
plt.figure()
plt.imshow(init_screen.cpu().squeeze(0).permute(1, 2, 0).numpy(),
           interpolation='none')
plt.title('Example extracted screen')
Esempio n. 6
0
REPLAY_START_SIZE = BATCH_SIZE

EPSILON_DECAY_LAST_FRAME = 10**4  #10**5
EPSILON_START = 0.9  #1.0
EPSILON_FINAL = 0.1

MODEL_PATH = './models/kuka'
MODEL_NAME = 'kuka_policy_net.pt'
device = 'cuda'

env = KukaDiverseObjectEnv(renders=False,
                           isDiscrete=True,
                           removeHeightHack=False,
                           maxSteps=20)
env.cid = p.connect(p.DIRECT)
env.reset()


## get the camera image from the pybullet environment
## (this is an observation of the enviroment)
def get_observation():
    obs = env._get_observation()  #.transpose(2, 0, 1)
    #print(obs.shape)
    return obs


plt.imshow(get_observation())

#plt.show()