def main(): env = KukaDiverseObjectEnv(renders=True, isDiscrete=False) policy = ContinuousDownwardBiasPolicy() while True: obs, done = env.reset(), False print("===================================") print("obs") print(obs) episode_rew = 0 while not done: env.render(mode='human') act = policy.sample_action(obs, .1) print("Action") print(act) obs, rew, done, _ = env.step([0, 0, 0, 0, 0]) episode_rew += rew print("Episode reward", episode_rew)
##################### # TENSORBOARD SETTINGS current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = 'logs/train/' + current_time graph_log_dir = 'logs/func/' + current_time train_summary_writer = tf.summary.create_file_writer(train_log_dir) with tf.Graph().as_default(): print(tf.executing_eagerly()) ###################### # start open/AI GYM environment #env = KukaCamGymEnv(renders=False, isDiscrete=False) env = KukaDiverseObjectEnv(renders=False, isDiscrete=False, maxSteps=20, removeHeightHack=False) print('shape of Observation space: ', env.observation_space.shape) print('shape of Action space: ', env.action_space.shape) print('Reward Range: ', env.reward_range) print('Action High value: ', env.action_space.high) print('Action Low Value: ', env.action_space.low) ################ # Hyper-parameters ###################### MAX_EPISODES = 15001 LR_A = 0.001 LR_C = 0.002 GAMMA = 0.99
from PIL import Image from tensorboardX import SummaryWriter # 可视化训练数据 # 导入基本的环境 from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv from gym import spaces import pybullet as p #from pybullet_envs.bullet.kukaCamGymEnv import KukaCamGymEnv #只有一个物体的抓取 # env1 = KukaCamGymEnv(renders=True) # env1.render(mode='human') # # env1.cid = p.connect(p.DIRECT) #上面是true之后显示的就已经是GUI版本了 #这里需要加入渲染的部分 env = KukaDiverseObjectEnv(renders=True, isDiscrete=False, removeHeightHack=False, maxSteps=20, numObjects=1) # 渲染True,默认的就是GUI,但是还不显示机器人 # maxsteps是一个episode里面可执行的最多的动作次数 # removeHeightHack 如果是False,每次机器人自动的做下降的动作,否则要从头开始学习 # cameraRandom 可以随机的放置相机的位置,0是位置确定,1是完全随机,大部分的多状态学习都是通过不同的相机位置 // 空间感知? # width,Height 感知到的相机的画幅 # numObjects 盘子里面物体的数量 # isTest False用于训练,test则是用于测试的数据组 env.cid = p.connect(p.DIRECT) action_space = spaces.Box(low=-1, high=1, shape=(5,1)) #这里插入学习网络的构造 ############## # Actor-Critic implementation import torch import torch.nn as nn import torch.nn.functional as F
from pybullet_envs.bullet.kuka_diverse_object_gym_env import KukaDiverseObjectEnv from gym import spaces from itertools import count import matplotlib.pyplot as plt env = KukaDiverseObjectEnv(renders=True, # True to see the simulation environment isDiscrete=False, removeHeightHack=False, maxSteps=20) print('shape of Observation space: ', env.observation_space.shape) print('shape of Action space: ', env.action_space.shape) print('Reward Range: ', env.reward_range) print('Action High value: ', env.action_space.high) print('Action Low Value: ', env.action_space.low) for episode in range(30): print('Episode: ', episode) obsv = env.reset() for t in count(): #plt.imshow(obsv) #plt.show() #env.render() action = env.action_space.sample() print("action=", action) next_obs, reward, done, info = env.step(action) print("reward", reward) if done: break obs = next_obs #print('t =', t)
BATCH_SIZE = 32 REPLAY_SIZE = 10000 LEARNING_RATE = 1e-4 SYNC_TARGET_FRAMES = 500 #150#1000 REPLAY_START_SIZE = BATCH_SIZE EPSILON_DECAY_LAST_FRAME = 10**4 #10**5 EPSILON_START = 0.9 #1.0 EPSILON_FINAL = 0.1 MODEL_PATH = './models/kuka' MODEL_NAME = 'kuka_policy_net.pt' device = 'cuda' env = KukaDiverseObjectEnv(renders=False, isDiscrete=True, removeHeightHack=False, maxSteps=20) env.cid = p.connect(p.DIRECT) env.reset() ## get the camera image from the pybullet environment ## (this is an observation of the enviroment) def get_observation(): obs = env._get_observation() #.transpose(2, 0, 1) #print(obs.shape) return obs plt.imshow(get_observation())
TB_LOG = False # enable / disable tensorboard logging if TB_LOG: current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = '../logs/train/' + current_time train_summary_writer = tf.summary.create_file_writer(train_log_dir) ############################ # for reproducibility tf.random.set_seed(20) np.random.seed(20) ##################### # start open/AI GYM environment env = KukaDiverseObjectEnv( renders=False, # True for testing isDiscrete=False, maxSteps=20, removeHeightHack=False) print('shape of Observation space: ', env.observation_space.shape) print('shape of Action space: ', env.action_space.shape) print('Reward Range: ', env.reward_range) print('Action High value: ', env.action_space.high) print('Action Low Value: ', env.action_space.low) ############################ upper_bound = env.action_space.high state_size = env.observation_space.shape # (48, 48, 3) action_size = env.action_space.shape # (3,) print('state_size: ', state_size) print('action_size: ', action_size)