def main(): if opt.use_cycle: opt.load_cycle = Frame_transfer(opt) if opt.use_dmp: opt.load_dmp = DMP(opt) opt.each_action_lim = opt.each_action_lim * opt.cut_frame_num * opt.dmp_ratio if opt.video_reward: test_path = os.path.join(opt.project_root, 'logs/a3c_log/test{}'.format(opt.test_id)) if not os.path.exists(test_path): os.mkdir(test_path) evaluator = Frame_eval( img_path=os.path.join(opt.project_root, 'logs/a3c_log/test{}'.format(opt.test_id), 'epoch-0'), frame_len=opt.cut_frame_num, start_id=0, memory_path=os.path.join(opt.project_root, 'logs/a3c_log/test{}'.format(opt.test_id), 'memory'), class_label=opt.action_id, opt=opt) opt.load_video_pred = evaluator Engine_module = importlib.import_module('Envs.env_{}'.format( opt.action_id)) RobotEnv = getattr(Engine_module, 'Engine{}'.format(opt.action_id)) if opt.use_embedding: if opt.nlp_embedding: agent = A3C_solver_embedding_nlp(opt, RobotEnv) else: agent = A3C_solver_embedding(opt, RobotEnv) else: agent = A3C_solver(opt, RobotEnv) agent.run()
def main (): Engine_module = importlib.import_module('Envs.env_{}'.format(opt.action_id)) Engine = getattr(Engine_module,'Engine{}'.format(opt.action_id)) if opt.use_cycle: opt.load_cycle = Frame_transfer (opt) if opt.use_dmp: opt.load_dmp = DMP(opt) opt.each_action_lim = opt.each_action_lim*opt.cut_frame_num*opt.dmp_ratio if opt.video_reward: test_path = os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id)) if not os.path.exists(test_path): os.mkdir(test_path) evaluator = Frame_eval (img_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'epoch-0'), frame_len=opt.cut_frame_num, start_id=0, memory_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'memory'), class_label=opt.action_id, opt = opt) opt.load_video_pred = evaluator if opt.gui: opt.p = bc.BulletClient (connection_mode=pybullet.GUI) else: opt.p = bc.BulletClient (connection_mode=pybullet.DIRECT) env = eval('Engine(opt)'.format(opt.action_id)) state_dim = env.observation_space action_dim = len (env.action_space['high']) max_action = env.action_space['high'][0] min_Val = torch.tensor (1e-7).float ().to (device) # min value if opt.use_embedding: if opt.nlp_embedding: # agent = TD3_embedding_nlp(state_dim, action_dim, max_action, env.log_root, opt) # agent = TD3_new(state_dim, action_dim, max_action, env.log_root, opt) agent = TD3_final(state_dim, action_dim, max_action, env.log_root, opt) else: agent = TD3_embedding (state_dim, action_dim, max_action, env.log_root, opt) else: agent = TD3 (state_dim, action_dim, max_action, env.log_root, opt) ep_r = 0 if opt.mode == 'test': agent.load (2000) for i in range (opt.iteration): state = env.reset () for t in range (100): action = agent.select_action (state) next_state, reward, done, info = env.step (np.float32 (action)) # if opt.use_embedding: # ep_r += reward[np.where (next_state[0] == 1)[0][0]] # else: # ep_r += reward # env.render () if done or t == 2000: print ("Ep_i \t{}, the ep_r is \t{:0.2f}, the step is \t{}".format (i, ep_r, t)) break state = next_state elif opt.mode == 'train': print ("====================================") print ("Collection Experience...") print ("====================================") # if opt.load: agent.load() buffer_root = '/scr1/system/gamma-robot/scripts/utils/buffer/211' action_all = np.load(os.path.join(buffer_root,'action_all.npy')) target_all = np.load(os.path.join(buffer_root,'target_all.npy')) rank_all = np.load(os.path.join(buffer_root,'rank_all.npy')) reward_all = np.load(os.path.join(buffer_root,'reward_all.npy')) for i in range (opt.num_iteration): if i<4000: continue target = target_all[i] state = env.reset (target) # file = os.path.join(opt.project_root,'logs','tds_log','test{}'.format(211),'epoch-{}'.format(i)) # log_reader = open(file,'r') # for line in log_reader.readlines(): # line = line.strip().split(':') # if line[0]=='target': # target = int(line[1]) # if line[0]=='action': action = action_all[i] print('epoch id:{}, action:{}'.format(i,str(action))) next_state, reward, done, info = env.step (action) print(reward,reward_all[i],rank_all[i]) else: raise NameError ("mode wrong!!!")
def main(): assert (opt.mode == 'test') if opt.mode == 'test': opt.test_id = 8888 Engine_module = importlib.import_module('Envs.env_{}'.format( opt.action_id)) Engine = getattr(Engine_module, 'Engine{}'.format(opt.action_id)) if opt.use_cycle: opt.load_cycle = Frame_transfer(opt) if opt.use_dmp: opt.load_dmp = DMP(opt) opt.each_action_lim = opt.each_action_lim * opt.cut_frame_num * opt.dmp_ratio if opt.video_reward: test_path = os.path.join(opt.project_root, 'logs/td3_log/test{}'.format(opt.test_id)) if not os.path.exists(test_path): os.mkdir(test_path) evaluator = Frame_eval( img_path=os.path.join(opt.project_root, 'logs/td3_log/test{}'.format(opt.test_id), 'epoch-0'), frame_len=opt.cut_frame_num, start_id=0, memory_path=os.path.join(opt.project_root, 'logs/td3_log/test{}'.format(opt.test_id), 'memory'), class_label=opt.action_id, opt=opt) opt.load_video_pred = evaluator if opt.gui: opt.p = bc.BulletClient(connection_mode=pybullet.GUI) else: opt.p = bc.BulletClient(connection_mode=pybullet.DIRECT) env = eval('Engine(opt)'.format(opt.action_id)) state_dim = env.observation_space action_dim = len(env.action_space['high']) max_action = env.action_space['high'][0] min_Val = torch.tensor(1e-7).float().to(device) # min value if opt.use_embedding: if opt.nlp_embedding: # agent = TD3_embedding_nlp(state_dim, action_dim, max_action, env.log_root, opt) # agent = TD3_new(state_dim, action_dim, max_action, env.log_root, opt) agent = TD3_final(state_dim, action_dim, max_action, env.log_root, opt) else: agent = TD3_embedding(state_dim, action_dim, max_action, env.log_root, opt) else: agent = TD3(state_dim, action_dim, max_action, env.log_root, opt) if opt.mode == 'test': weight_id = 1400 test_file = open( os.path.join(test_path, 'test_{}.txt'.format(weight_id)), 'w') # agent.load (4400) agent.load(weight_id) for target in opt.embedding_list: state = env.reset(target) action = agent.select_action(state) action = action.clip(-max_action, max_action) next_state, reward, done, info = env.step(action) reward_id = np.where( np.array(env.opt.embedding_list) == env.opt.load_embedding)[0][0] test_file.write('{}\n'.format(reward[reward_id])) print(action) else: raise NameError("mode wrong!!!")
def main(): Engine_module = importlib.import_module('Envs.env_{}'.format( opt.action_id)) Engine = getattr(Engine_module, 'Engine{}'.format(opt.action_id)) if opt.use_cycle: opt.load_cycle = Frame_transfer(opt) if opt.use_dmp: opt.load_dmp = DMP(opt) opt.each_action_lim = opt.each_action_lim * opt.cut_frame_num * opt.dmp_ratio if opt.video_reward: test_path = os.path.join(opt.project_root, 'logs/td3_log/test{}'.format(opt.test_id)) if not os.path.exists(test_path): os.mkdir(test_path) evaluator = Frame_eval( img_path=os.path.join(opt.project_root, 'logs/td3_log/test{}'.format(opt.test_id), 'epoch-0'), frame_len=opt.cut_frame_num, start_id=0, memory_path=os.path.join(opt.project_root, 'logs/td3_log/test{}'.format(opt.test_id), 'memory'), class_label=opt.action_id, opt=opt) opt.load_video_pred = evaluator if opt.gui: opt.p = bc.BulletClient(connection_mode=pybullet.GUI) else: opt.p = bc.BulletClient(connection_mode=pybullet.DIRECT) env = eval('Engine(opt)'.format(opt.action_id)) state_dim = env.observation_space action_dim = len(env.action_space['high']) max_action = env.action_space['high'][0] min_Val = torch.tensor(1e-7).float().to(device) # min value if opt.use_embedding: if opt.nlp_embedding: # agent = TD3_embedding_nlp(state_dim, action_dim, max_action, env.log_root, opt) # agent = TD3_new(state_dim, action_dim, max_action, env.log_root, opt) agent = TD3_final(state_dim, action_dim, max_action, env.log_root, opt) else: agent = TD3_embedding(state_dim, action_dim, max_action, env.log_root, opt) else: agent = TD3(state_dim, action_dim, max_action, env.log_root, opt) ep_r = 0 if opt.mode == 'test': agent.load(2000) for i in range(opt.iteration): state = env.reset() for t in range(100): action = agent.select_action(state) next_state, reward, done, info = env.step(np.float32(action)) # if opt.use_embedding: # ep_r += reward[np.where (next_state[0] == 1)[0][0]] # else: # ep_r += reward # env.render () if done or t == 2000: print("Ep_i \t{}, the ep_r is \t{:0.2f}, the step is \t{}". format(i, ep_r, t)) break state = next_state elif opt.mode == 'train': print("====================================") print("Collection Experience...") print("====================================") if opt.load: agent.load(7000) if opt.add_buffer: # add exploration data into buffer buffer_path = os.path.join(opt.project_root, 'scripts', 'utils', 'buffer', 'buffer-100k') all_action = np.load(os.path.join(buffer_path, 'action_all.npy')) all_reward = np.load(os.path.join(buffer_path, 'reward_all.npy')) all_embedding = np.load( os.path.join(buffer_path, 'embedding_all.npy')) all_target = np.load(os.path.join(buffer_path, 'target_all.npy')) all_rank = np.load(os.path.join(buffer_path, 'rank_all.npy')) task_state = np.load(os.path.join(buffer_path, 'state.npy')) for i in range(all_action.shape[0]): if not (all_target[i] in opt.fine_tune_list): continue print('add buffer data:{}'.format(i)) state = (all_embedding[i], task_state[all_target[i]]) next_state = (all_embedding[i], task_state[all_target[i]]) action = all_action[i] reward = all_reward[i] done = True agent.memory.push( (state, next_state, action, reward, np.float(done))) reward_id = np.where( np.array(env.opt.embedding_list) == all_target[i])[0][0] ep_r = reward[reward_id] if ep_r > 0: for push_t in range(4): agent.memory.push((state, next_state, action, reward, np.float(done))) for i in range(opt.num_iteration): state = env.reset() for t in range(2000): action = agent.select_action(state) action = action + np.random.normal( 0, max_action * opt.noise_level, size=action.shape) action = action.clip(-max_action, max_action) print('epoch id:{}, action:{}'.format(i, str(action))) next_state, reward, done, info = env.step(action) if opt.use_embedding: reward_id = np.where( np.array(env.opt.embedding_list) == env.opt.load_embedding)[0][0] ep_r += reward[reward_id] else: ep_r += reward # if opt.render and i >= opt.render_interval : env.render() agent.memory.push( (state, next_state, action, reward, np.float(done))) if ep_r > 0: for push_t in range(4): agent.memory.push((state, next_state, action, reward, np.float(done))) if i + 1 % 10 == 0: print('Episode {}, The memory size is {} '.format( i, len(agent.memory.storage))) if len(agent.memory.storage) >= opt.start_train - 1: agent.update(opt.update_time) opt.noise_level = opt.noise_training_level state = next_state if done or t == opt.max_episode - 1: agent.writer.add_scalar('ep_r', ep_r, global_step=i) if i % opt.print_log == 0: print( "Ep_i \t{}, the ep_r is \t{:0.2f}, the step is \t{}" .format(i, ep_r, t)) ep_r = 0 break if i % opt.log_interval == 0: agent.save(i) else: raise NameError("mode wrong!!!")
def show_one_policy (): Engine_module = importlib.import_module('Envs.env_{}'.format(opt.action_id)) Engine = getattr(Engine_module,'Engine{}'.format(opt.action_id)) if opt.use_cycle: opt.load_cycle = Frame_transfer (opt) if opt.use_dmp: opt.load_dmp = DMP(opt) opt.each_action_lim = opt.each_action_lim*opt.cut_frame_num*opt.dmp_ratio if opt.video_reward: test_path = os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id)) if not os.path.exists(test_path): os.mkdir(test_path) evaluator = Frame_eval (img_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'epoch-0'), frame_len=opt.cut_frame_num, start_id=0, memory_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'memory'), class_label=opt.action_id, opt = opt) opt.load_video_pred = evaluator if opt.gui: opt.p = bc.BulletClient (connection_mode=pybullet.GUI) else: opt.p = bc.BulletClient (connection_mode=pybullet.DIRECT) env = eval('Engine(opt)'.format(opt.action_id)) state_dim = env.observation_space action_dim = len (env.action_space['high']) max_action = env.action_space['high'][0] if opt.use_embedding: agent = TD3_embedding (state_dim, action_dim, max_action, env.log_root, opt) else: agent = TD3 (state_dim, action_dim, max_action, env.log_root, opt) assert(opt.mode == 'test') agent.load (2000) state = env.reset () inter_n = 10. fig = plt.figure () ax = Axes3D (fig) # ax.view_init(elev=45,azim=0) X = np.arange (0, 1+1/inter_n, 1/inter_n) Y = np.arange (0, 1+1/inter_n, 1/inter_n) X, Y = np.meshgrid (X, Y) R = np.sqrt (X ** 2 + Y ** 2) Z = np.sin (R) help(ax.plot_surface) for i in range(int(inter_n+1)): for j in range(int(inter_n+1)): state[0] = np.array ([i/inter_n, 0, 0, j/inter_n]) action = agent.select_action (state) obj = env.p.getAABB (env.obj_id, -1) obj_center = [(x + y) * 0.5 for x, y in zip (obj[0], obj[1])] world_pos = [(x + y) for x, y in zip (obj_center, action)] world_pos.append (1) camera_pos = np.array (env.view_matrix).reshape (4, -1).T.dot (np.array (world_pos)) camera_pos = [x / camera_pos[-1] for x in camera_pos] print((i,j),camera_pos) Z[i][j] = camera_pos[1] ax.plot_surface (X, Y, Z, rstride=1, cstride=1, cmap='rainbow') plt.show () plt.cla () fig = plt.figure () ax = Axes3D (fig) ax.view_init (elev=45, azim=90) ax.plot_surface (X, Y, Z, rstride=1, cstride=1, cmap='rainbow') plt.show ()
def analysis_nlp_embedding(cmd='move sth down'): assert (opt.mode == 'test') if opt.mode == 'test': opt.test_id = 8888 Engine_module = importlib.import_module ('Envs.env_{}'.format (opt.action_id)) Engine = getattr (Engine_module, 'Engine{}'.format (opt.action_id)) if opt.use_cycle: opt.load_cycle = Frame_transfer (opt) if opt.use_dmp: opt.load_dmp = DMP (opt) opt.each_action_lim = opt.each_action_lim * opt.cut_frame_num * opt.dmp_ratio if opt.video_reward: test_path = os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id)) if not os.path.exists (test_path): os.mkdir (test_path) evaluator = Frame_eval ( img_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'epoch-0'), frame_len=opt.cut_frame_num, start_id=0, memory_path=os.path.join (opt.project_root, 'logs/td3_log/test{}'.format (opt.test_id), 'memory'), class_label=opt.action_id, opt=opt) opt.load_video_pred = evaluator if opt.gui: opt.p = bc.BulletClient (connection_mode=pybullet.GUI) else: opt.p = bc.BulletClient (connection_mode=pybullet.DIRECT) env = eval ('Engine(opt)'.format (opt.action_id)) state_dim = env.observation_space action_dim = len (env.action_space['high']) max_action = env.action_space['high'][0] min_Val = torch.tensor (1e-7).float ().to (device) # min value if opt.use_embedding: if opt.nlp_embedding: # agent = TD3_embedding_nlp(state_dim, action_dim, max_action, env.log_root, opt) # agent = TD3_new(state_dim, action_dim, max_action, env.log_root, opt) agent = TD3_final (state_dim, action_dim, max_action, env.log_root, opt) else: agent = TD3_embedding (state_dim, action_dim, max_action, env.log_root, opt) else: agent = TD3 (state_dim, action_dim, max_action, env.log_root, opt) assert(opt.mode == 'test') agent.load (1400) state = env.reset () bert_engine = BertClient(port=5575, port_out=5576) for i in range(100): cmd = 'moving something close to something' target = 42 state = env.reset (target=target) embedding = bert_engine.encode([cmd]) state[0] = embedding[0] action = agent.select_action (state)