def main():
    
    env = KukaDiverseObjectEnv(renders=True, isDiscrete=False)
    policy = ContinuousDownwardBiasPolicy()

    while True:
        obs, done = env.reset(), False
        print("===================================")        
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render(mode='human')
            act = policy.sample_action(obs, .1)
            print("Action")
            print(act)
            obs, rew, done, _ = env.step([0, 0, 0, 0, 0])
            episode_rew += rew
        print("Episode reward", episode_rew)
Beispiel #2
0
    #####################
    # TENSORBOARD SETTINGS
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_log_dir = 'logs/train/' + current_time
    graph_log_dir = 'logs/func/' + current_time
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)

    with tf.Graph().as_default():
        print(tf.executing_eagerly())
    ######################

    # start open/AI GYM environment
    #env = KukaCamGymEnv(renders=False, isDiscrete=False)
    env = KukaDiverseObjectEnv(renders=False,
                               isDiscrete=False,
                               maxSteps=20,
                               removeHeightHack=False)
    print('shape of Observation space: ', env.observation_space.shape)
    print('shape of Action space: ', env.action_space.shape)
    print('Reward Range: ', env.reward_range)
    print('Action High value: ', env.action_space.high)
    print('Action Low Value: ', env.action_space.low)

    ################
    # Hyper-parameters
    ######################
    MAX_EPISODES = 15001

    LR_A = 0.001
    LR_C = 0.002
    GAMMA = 0.99
Beispiel #3
0
from PIL import Image
from tensorboardX import SummaryWriter # 可视化训练数据

# 导入基本的环境
from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv
from gym import spaces
import pybullet as p

#from pybullet_envs.bullet.kukaCamGymEnv import KukaCamGymEnv #只有一个物体的抓取

# env1 = KukaCamGymEnv(renders=True)
# env1.render(mode='human') #
# env1.cid = p.connect(p.DIRECT) #上面是true之后显示的就已经是GUI版本了

#这里需要加入渲染的部分
env = KukaDiverseObjectEnv(renders=True, isDiscrete=False, removeHeightHack=False, maxSteps=20, numObjects=1) # 渲染True,默认的就是GUI,但是还不显示机器人
# maxsteps是一个episode里面可执行的最多的动作次数
# removeHeightHack 如果是False,每次机器人自动的做下降的动作,否则要从头开始学习
# cameraRandom 可以随机的放置相机的位置,0是位置确定,1是完全随机,大部分的多状态学习都是通过不同的相机位置 //  空间感知?
# width,Height  感知到的相机的画幅
# numObjects 盘子里面物体的数量
# isTest False用于训练,test则是用于测试的数据组
env.cid = p.connect(p.DIRECT)
action_space = spaces.Box(low=-1, high=1, shape=(5,1))

#这里插入学习网络的构造
##############
# Actor-Critic implementation 
import torch
import torch.nn as nn
import torch.nn.functional as F
from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv
from gym import spaces
from itertools import count
import matplotlib.pyplot as plt
env = KukaDiverseObjectEnv(renders=True,  # True to see the simulation environment
                           isDiscrete=False,
                           removeHeightHack=False,
                           maxSteps=20)

print('shape of Observation space: ', env.observation_space.shape)
print('shape of Action space: ', env.action_space.shape)
print('Reward Range: ', env.reward_range)
print('Action High value: ', env.action_space.high)
print('Action Low Value: ', env.action_space.low)


for episode in range(30):
    print('Episode: ', episode)
    obsv = env.reset()
    for t in count():
        #plt.imshow(obsv)
        #plt.show()
        #env.render()
        action = env.action_space.sample()
        print("action=", action)
        next_obs, reward, done, info = env.step(action)
        print("reward", reward)
        if done:
            break
        obs = next_obs
        #print('t =', t)
Beispiel #5
0
BATCH_SIZE = 32
REPLAY_SIZE = 10000
LEARNING_RATE = 1e-4
SYNC_TARGET_FRAMES = 500  #150#1000
REPLAY_START_SIZE = BATCH_SIZE

EPSILON_DECAY_LAST_FRAME = 10**4  #10**5
EPSILON_START = 0.9  #1.0
EPSILON_FINAL = 0.1

MODEL_PATH = './models/kuka'
MODEL_NAME = 'kuka_policy_net.pt'
device = 'cuda'

env = KukaDiverseObjectEnv(renders=False,
                           isDiscrete=True,
                           removeHeightHack=False,
                           maxSteps=20)
env.cid = p.connect(p.DIRECT)
env.reset()


## get the camera image from the pybullet environment
## (this is an observation of the enviroment)
def get_observation():
    obs = env._get_observation()  #.transpose(2, 0, 1)
    #print(obs.shape)
    return obs


plt.imshow(get_observation())
Beispiel #6
0
    TB_LOG = False  # enable / disable tensorboard logging

    if TB_LOG:
        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        train_log_dir = '../logs/train/' + current_time
        train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    ############################
    # for reproducibility
    tf.random.set_seed(20)
    np.random.seed(20)

    #####################
    # start open/AI GYM environment
    env = KukaDiverseObjectEnv(
        renders=False,  # True for testing
        isDiscrete=False,
        maxSteps=20,
        removeHeightHack=False)
    print('shape of Observation space: ', env.observation_space.shape)
    print('shape of Action space: ', env.action_space.shape)
    print('Reward Range: ', env.reward_range)
    print('Action High value: ', env.action_space.high)
    print('Action Low Value: ', env.action_space.low)

    ############################
    upper_bound = env.action_space.high
    state_size = env.observation_space.shape  # (48, 48, 3)
    action_size = env.action_space.shape  # (3,)
    print('state_size: ', state_size)
    print('action_size: ', action_size)