def main(): # Parse command line arguments. parser = argparse.ArgumentParser() parser.add_argument('--disp', action='store_true') parser.add_argument('--task', default='insertion') parser.add_argument('--mode', default='train') parser.add_argument('--n', default=1000, type=int) args = parser.parse_args() # Initialize environment and task. env = Environment(args.disp, hz=480) task = tasks.names[args.task]() task.mode = args.mode # Initialize scripted oracle agent and dataset. agent = task.oracle(env) dataset = Dataset(os.path.join('data', f'{args.task}-{task.mode}')) # Train seeds are even and test seeds are odd. seed = dataset.max_seed if seed < 0: seed = -1 if (task.mode == 'test') else -2 # Collect training data from oracle demonstrations. while dataset.n_episodes < args.n: print(f'Oracle demonstration: {dataset.n_episodes + 1}/{args.n}') episode, total_reward = [], 0 seed += 2 np.random.seed(seed) obs, reward, _, info = env.reset(task) for _ in range(task.max_steps): act = agent.act(obs, info) episode.append((obs, act, reward, info)) obs, reward, done, info = env.step(act) total_reward += reward print(f'{done} {total_reward}') if done: break episode.append((obs, None, reward, info)) # Only save completed demonstrations. # TODO(andyzeng): add back deformable logic. if total_reward > 0.99: dataset.add(seed, episode)
def main(): # Parse command line arguments. parser = argparse.ArgumentParser() parser.add_argument('--disp', action='store_true') parser.add_argument('--task', default='insertion') parser.add_argument('--agent', default='transporter') parser.add_argument('--n_demos', default=100, type=int) parser.add_argument('--n_steps', default=40000, type=int) parser.add_argument('--n_runs', default=1, type=int) parser.add_argument('--gpu', default=0, type=int) parser.add_argument('--gpu_limit', default=None, type=int) args = parser.parse_args() # Configure which GPU to use. cfg = tf.config.experimental gpus = cfg.list_physical_devices('GPU') if not gpus: print('No GPUs detected. Running with CPU.') else: cfg.set_visible_devices(gpus[args.gpu], 'GPU') # Configure how much GPU to use (in Gigabytes). if args.gpu_limit is not None: mem_limit = 1024 * args.gpu_limit dev_cfg = [cfg.VirtualDeviceConfiguration(memory_limit=mem_limit)] cfg.set_virtual_device_configuration(gpus[0], dev_cfg) # Initialize environment and task. env = Environment(args.disp, hz=480) task = tasks.names[args.task]() task.mode = 'test' # Load test dataset. dataset = Dataset(os.path.join('data', f'{args.task}-test')) # Run testing for each training run. for train_run in range(args.n_runs): name = f'{args.task}-{args.agent}-{args.n_demos}-{train_run}' # Initialize agent. np.random.seed(train_run) tf.random.set_seed(train_run) agent = agents.names[args.agent](name, args.task) # # Run testing every interval. # for train_step in range(0, args.n_steps + 1, args.interval): # Load trained agent. if args.n_steps > 0: agent.load(args.n_steps) # Run testing and save total rewards with last transition info. results = [] for i in range(dataset.n_episodes): print(f'Test: {i + 1}/{dataset.n_episodes}') episode, seed = dataset.load(i) goal = episode[-1] total_reward = 0 np.random.seed(seed) obs, reward, _, info = env.reset(task) for _ in range(task.max_steps): act = agent.act(obs, info, goal) obs, reward, done, info = env.step(act) total_reward += reward print(f'{done} {total_reward}') if done: break results.append((total_reward, info)) # Save results. pickle.dump(results, open(f'{name}-{args.n_steps}.pkl', 'wb'))
task = tasks.names[args.task]() dataset = Dataset(os.path.join('data', args.task)) if args.subsamp_g: dataset.subsample_goals = True # Collect training data from oracle demonstrations. max_demos = 10**MAX_ORDER task.mode = 'train' seed_to_add = 0 # Daniel: check carefully if resuming the bag-items tasks. # If continuing from prior calls, the demo index starts counting based on # the number of demos that exist in `data/{task}`. Make the environment # here, to issues with cloth rendering + multiple Environment calls. make_new_env = (dataset.num_episodes < max_demos) if make_new_env: env = Environment(args.disp, hz=args.hz) # For some tasks, call reset() again with a new seed if init state is 'done'. while dataset.num_episodes < max_demos: seed = dataset.num_episodes + seed_to_add print( f'\nNEW DEMO: {dataset.num_episodes+1}/{max_demos}, seed {seed}\n') np.random.seed(seed) demo_reward, episode, t, last_obs_info = rollout( task.oracle(env), env, task, args) last_extras = last_obs_info[1]['extras'] # Check if we should ignore or include this demo in the dataset. if ignore_this_demo(args, demo_reward, t, last_extras): seed_to_add += 1 print(
def main(): # Parse command line arguments. parser = argparse.ArgumentParser() parser.add_argument('--gpu', default='0') parser.add_argument('--disp', action='store_true') parser.add_argument('--task', default='hanoi') parser.add_argument('--agent', default='transporter') parser.add_argument('--hz', default=240.0, type=float) parser.add_argument('--num_demos', default='100') parser.add_argument('--num_rots', default=36, type=int) parser.add_argument('--gpu_mem_limit', default=None) parser.add_argument('--subsamp_g', action='store_true') parser.add_argument('--crop_bef_q', default=1, type=int) args = parser.parse_args() # Configure which GPU to use. cfg = tf.config.experimental gpus = cfg.list_physical_devices('GPU') if not gpus: print('No GPUs detected. Running with CPU.') else: cfg.set_visible_devices(gpus[int(args.gpu)], 'GPU') # Configure how much GPU to use. if args.gpu_mem_limit is not None: mem_limit = 1024 * int(args.gpu_mem_limit) print(args.gpu_mem_limit) dev_cfg = [cfg.VirtualDeviceConfiguration(memory_limit=mem_limit)] cfg.set_virtual_device_configuration(gpus[0], dev_cfg) # Initialize environment and task. env = Environment(args.disp, hz=args.hz) task = tasks.names[args.task]() dataset = Dataset(os.path.join('data', args.task)) if args.subsamp_g: dataset.subsample_goals = True # Collect training data from oracle demonstrations. max_order = 3 max_demos = 10**max_order task.mode = 'train' seed_toadd_train = 0 while dataset.num_episodes < max_demos: seed = dataset.num_episodes + seed_toadd_train np.random.seed(seed) print( f'Demonstration: {dataset.num_episodes + 1}/{max_demos}, seed {seed}' ) total_reward, episode, t, last_obs_info = rollout( task.oracle(env), env, task) # Check if episode should be added, if not, then add seed offset. _, last_info = last_obs_info if ignore_this_demo(args, total_reward, t, last_info): seed_toadd_train += 1 li = last_info['extras'] print(f'Ignoring demo. {li}, seed_toadd: {seed_toadd_train}') else: dataset.add(episode, last_obs_info) # Collect validation dataset with different random seeds. validation_dataset = Dataset(os.path.join('validation_data', args.task)) num_validation = 100 seed_tosub_valid = 0 while validation_dataset.num_episodes < num_validation: seed = 2**32 - 1 - validation_dataset.num_episodes - seed_tosub_valid np.random.seed(seed) print( f'Validation Demonstration: {validation_dataset.num_episodes + 1}/{num_validation}, seed {seed}' ) total_reward, episode, t, last_obs_info = rollout( task.oracle(env), env, task) # Check if episode should be added, if not, then subtract seed offset. _, last_info = last_obs_info if ignore_this_demo(args, total_reward, t, last_info): seed_tosub_valid += 1 li = last_info['extras'] print(f'Ignoring demo. {li}, seed_tosub: {seed_tosub_valid}') else: validation_dataset.add(episode, last_obs_info) env.stop() del env # Evaluate on increasing orders of magnitude of demonstrations. num_train_runs = 1 # 3+ to measure variance over random initialization num_train_iters = 40000 test_interval = 2000 num_test_episodes = 20 # there are a few seeds that the oracle # can't complete either, skip these # TODO(peteflorence): compute this automatically for each task oracle_cant_complete_seed = [] if args.task == 'insertion-sixdof': oracle_cant_complete_seed.append(3) num_test_episodes += len(oracle_cant_complete_seed) # Do multiple training runs from scratch. for train_run in range(num_train_runs): # Set up tensorboard logger. current_time = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') train_log_dir = os.path.join('logs', args.agent, args.task, current_time, 'train') train_summary_writer = tf.summary.create_file_writer(train_log_dir) # Set the beginning of the agent name. We may add more to it. name = f'{args.task}-{args.agent}-{args.num_demos}-{train_run}' # Initialize agent and limit random dataset sampling to fixed set. tf.random.set_seed(train_run) if args.agent == 'transporter': name = f'{name}-rots-{args.num_rots}-crop_bef_q-{args.crop_bef_q}' agent = agents.names[args.agent](name, args.task, num_rotations=args.num_rots, crop_bef_q=(args.crop_bef_q == 1)) elif 'transporter-goal' in args.agent: # For transporter-goal and transporter-goal-naive agents. name = f'{name}-rots-{args.num_rots}' if args.subsamp_g: name += '-sub_g' else: name += '-fin_g' agent = agents.names[args.agent](name, args.task, num_rotations=args.num_rots) else: agent = agents.names[args.agent](name, args.task) np.random.seed(train_run) num_demos = int(args.num_demos) train_episodes = np.random.choice(range(max_demos), num_demos, False) dataset.set(train_episodes) # agent.load(10000) performance = [] while agent.total_iter < num_train_iters: # Train agent. tf.keras.backend.set_learning_phase(1) agent.train(dataset, num_iter=test_interval, writer=train_summary_writer, validation_dataset=validation_dataset) tf.keras.backend.set_learning_phase(0) # Skip evaluation depending on the task or if it's a goal-based agent. if (skip_testing_during_training(args.task) or 'transporter-goal' in args.agent): continue # Evaluate agent. task.mode = 'test' env = Environment(args.disp, hz=args.hz) for episode in range(num_test_episodes): if episode in oracle_cant_complete_seed: continue np.random.seed(10**max_order + episode) total_reward, _, _, _ = rollout(agent, env, task) print(f'Test: {episode} Total Reward: {total_reward:.2f}') performance.append((agent.total_iter, total_reward)) env.stop() del env # Save results. pickle.dump(performance, open(f'{name}.pkl', 'wb'))
dev_cfg = [cfg.VirtualDeviceConfiguration(memory_limit=MEM_LIMIT)] cfg.set_virtual_device_configuration(gpus[0], dev_cfg) # Initialize task, set to 'test,' but I think this only matters for kitting. task = tasks.names[args.task]() task.mode = 'test' # Evaluate on saved snapshots. Go backwards to get better results first. snapshot_itrs = [i * 2000 for i in range(1, 10 + 1) ] # Do 10 snapshots to save on compute. snapshot_itrs = snapshot_itrs[::-1] if not os.path.exists('test_results'): os.makedirs('test_results') # Make environment once, due to issues with deformables + multiple calls. env = Environment(args.disp, hz=args.hz) # Check if it's goal-conditioned. goal_conditioned = is_goal_conditioned(args) for snapshot_itr in snapshot_itrs: # Set random seeds, so different snapshots test on same starting states. tf.random.set_seed(args.train_run) np.random.seed(args.train_run) # Set the beginning of the agent name. name = f'{args.task}-{args.agent}-{args.num_demos}-{args.train_run}' # Initialize agent and load from snapshot. NOTE: main difference from # main.py is to use num_rots_inf (not args.num_rots) for inference time. # Also, `self.name` must match what's in main.py, to load correct weights.