def phase1_train(spec, specfilename): util.header('=== Phase 1: training ===') # Generate array job that trains all algorithms # over all tasks, for all dataset sizes (3 loops) taskname2dset = gen_taskname2outfile(spec) # Make checkpoint dir. All outputs go here checkptdir = os.path.join(spec['options']['storagedir'], spec['options']['checkpt_subdir']) util.mkdir_p(checkptdir) # Make sure checkpoint dir is empty assert not os.listdir(checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir) # Assemble the commands to run on the cluster cmd_templates, outputfilenames, argdicts = [], [], [] for alg in spec['training']['algorithms']: for task in spec['tasks']: for num_trajs in spec['training']['dataset_num_trajs']: assert num_trajs <= spec['training']['full_dataset_num_trajs'] for run in range(spec['training']['runs']): # A string identifier. Used in filenames for this run strid = 'alg={},task={},num_trajs={},run={}'.format(alg['name'], task['name'], num_trajs, run) cmd_templates.append(alg['cmd'].replace('\n', ' ').strip()) outputfilenames.append(strid + '.txt') argdicts.append({ 'env': task['env'], 'dataset': taskname2dset[task['name']], 'num_trajs': num_trajs, 'cuts_off_on_success': int(task['cuts_off_on_success']), 'data_subsamp_freq': task['data_subsamp_freq'], 'out': os.path.join(checkptdir, strid + '.h5'), }) pbsopts = spec['options']['pbs'] runpbs( cmd_templates, outputfilenames, argdicts, jobname=pbsopts['jobname'], queue=pbsopts['queue'], nodes=1, ppn=pbsopts['ppn'], job_range=pbsopts['range'] if 'range' in pbsopts else None, qsub_script_copy=os.path.join(checkptdir, 'qsub_script.sh') ) # Copy the pipeline yaml file to the output dir too shutil.copyfile(specfilename, os.path.join(checkptdir, 'pipeline.yaml')) # Keep git commit import subprocess git_hash = subprocess.check_output('git rev-parse HEAD', shell=True).strip() with open(os.path.join(checkptdir, 'git_hash.txt'), 'w') as f: f.write(git_hash + '\n')
def phase1_train(spec, specfilename): """ In the normal code, this rounds up a long list of commands of the form `python (script name) (arguments)` which can be run on a cluster. It's really cool how this works. The `cmd_templates` list turns into a bunch of python script calls, except it has string formatting to allow the arguments to fill them in. A much better way than writing a long bash script! (Actually, to *get* a bash script, just write these one by one to a file and then I think running the file is OK.) I modified this to run sequentially. """ util.header('=== Phase 1: training ===') # Generate array job that trains (1) all algorithms over (2) all tasks, for # (3) all dataset sizes, so yes it's three loops. taskname2dset = gen_taskname2outfile(spec) # Make checkpoint dir. All outputs go here checkptdir = os.path.join(spec['options']['storagedir'], spec['options']['checkpt_subdir']) util.mkdir_p(checkptdir) # Make sure checkpoint dir is empty assert not os.listdir(checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir) # Assemble the commands to run on the cluster cmd_templates, outputfilenames, argdicts = [], [], [] for alg in spec['training']['algorithms']: for task in spec['tasks']: for num_trajs in spec['training']['dataset_num_trajs']: assert num_trajs <= spec['training']['full_dataset_num_trajs'] for run in range(spec['training']['runs']): # A string identifier. Used in filenames for this run strid = 'alg={},task={},num_trajs={},run={}'.format(alg['name'], task['name'], num_trajs, run) cmd_templates.append(alg['cmd'].replace('\n', ' ').strip()) outputfilenames.append(strid + '.txt') argdicts.append({ 'env': task['env'], 'dataset': taskname2dset[task['name']], 'num_trajs': num_trajs, 'cuts_off_on_success': int(task['cuts_off_on_success']), 'data_subsamp_freq': task['data_subsamp_freq'], 'out': os.path.join(checkptdir, strid + '.h5'), }) # (New code from Daniel) Put commands in a list and run them sequentially. all_commands = [x.format(**y) for (x,y) in zip(cmd_templates,argdicts)] print("Total number of commands to run: {}.".format(len(all_commands))) for command in all_commands: subprocess.call(command.split(" "))
def gen_taskname2outfile(spec, assert_not_exists=False): ''' Generate dataset filenames for each task. Phase 0 (sampling) writes to these files, phase 1 (training) reads from them. ''' taskname2outfile = {} trajdir = os.path.join(spec['options']['storagedir'], spec['options']['traj_subdir']) util.mkdir_p(trajdir) for task in spec['tasks']: assert task['name'] not in taskname2outfile fname = os.path.join(trajdir, 'trajs_{}.h5'.format(task['name'])) if assert_not_exists: assert not os.path.exists(fname), 'Traj destination {} already exists'.format(fname) taskname2outfile[task['name']] = fname return taskname2outfile
def phase1_train(spec, specfilename): util.header('=== Phase 1: training ===') # Generate array job that trains all algorithms # over all tasks, for all dataset sizes (3 loops) taskname2dset = gen_taskname2outfile(spec) # Make checkpoint dir. All outputs go here checkptdir = os.path.join(spec['options']['storagedir'], spec['options']['checkpt_subdir']) util.mkdir_p(checkptdir) # Make sure checkpoint dir is empty assert not os.listdir(checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir) # Assemble the commands to run on the cluster cmd_templates, outputfilenames, argdicts = [], [], [] for alg in spec['training']['algorithms']: for task in spec['tasks']: for num_trajs in spec['training']['dataset_num_trajs']: assert num_trajs <= spec['training']['full_dataset_num_trajs'] for run in range(spec['training']['runs']): # A string identifier. Used in filenames for this run strid = 'alg={},task={},num_trajs={},run={}'.format(alg['name'], task['name'], num_trajs, run) cmd_templates.append(alg['cmd'].replace('\n', ' ').strip()) outputfilenames.append(strid + '.txt') argdicts.append({ 'env': task['env'], 'dataset': taskname2dset[task['name']], 'num_trajs': num_trajs, 'cuts_off_on_success': int(task['cuts_off_on_success']), 'data_subsamp_freq': task['data_subsamp_freq'], 'out': os.path.join(checkptdir, strid + '.h5'), }) for x, y in zip(cmd_templates, argdicts): subprocess.call (x.format(**y).split(" ")) # Copy the pipeline yaml file to the output dir too shutil.copyfile(specfilename, os.path.join(checkptdir, 'pipeline.yaml')) # Keep git commit git_hash = subprocess.check_output('git rev-parse HEAD', shell=True).strip() with open(os.path.join(checkptdir, 'git_hash.txt'), 'w') as f: f.write(git_hash + '\n')
def main(): np.set_printoptions(suppress=True, precision=5, linewidth=1000) parser = argparse.ArgumentParser() # MDP options parser.add_argument('policy', type=str) parser.add_argument('output_dir', type=str) parser.add_argument('--deterministic', default=1, type=int) parser.add_argument('--max_steps', type=int, required=True) parser.add_argument('--env_name', type=str, default=None) args = parser.parse_args() util.mkdir_p(args.output_dir) assert not os.listdir(args.output_dir), '%s is not empty' % args.output_dir print 'Writing to', args.output_dir # Load the saved state policy_file, policy_key = util.split_h5_name(args.policy) print 'Loading policy parameters from %s in %s' % (policy_key, policy_file) with h5py.File(policy_file, 'r') as f: train_args = json.loads(f.attrs['args']) dset = f[policy_key] import pprint pprint.pprint(dict(dset.attrs)) # Initialize the MDP env_name = train_args['env_name'] if args.env_name is None else args.env_name print 'Loading environment', env_name mdp = rllabenv.RLLabMDP(env_name) util.header('MDP observation space, action space sizes: %d, %d\n' % (mdp.obs_space.dim, mdp.action_space.storage_size)) util.header('Max steps is {}'.format(args.max_steps)) # Initialize the policy and load its parameters enable_obsnorm = bool(train_args['enable_obsnorm']) if 'enable_obsnorm' in train_args else train_args['obsnorm_mode'] != 'none' if isinstance(mdp.action_space, policyopt.ContinuousSpace): policy_cfg = rl.GaussianPolicyConfig( hidden_spec=train_args['policy_hidden_spec'], min_stdev=0., init_logstdev=0., enable_obsnorm=enable_obsnorm) policy = rl.GaussianPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GaussianPolicy') else: policy_cfg = rl.GibbsPolicyConfig( hidden_spec=train_args['policy_hidden_spec'], enable_obsnorm=enable_obsnorm) policy = rl.GibbsPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GibbsPolicy') policy.load_h5(policy_file, policy_key) # Animate sim = mdp.new_sim() steps = 0 exit = False while not exit: sim.reset() while not sim.done: a = policy.sample_actions(sim.obs[None,:], bool(args.deterministic))[0][0,:] sim.step(a) sim.draw() viewer = sim.env.viewer data, w, h = viewer.get_image() image = np.fromstring(data, dtype='uint8').reshape(h, w, 3)[::-1,:,:] cv2.imwrite('%s/img_%08d.png' % (args.output_dir, steps), image[:,:,::-1]) print steps steps += 1 if steps >= args.max_steps: exit = True break
def main(): """ If we have trained policies and snapshots, I think we can use this to watch videos of our agent in action. I don't think I can use this without doing some training first. This doesn't do training itself; we need to provide a policy, but the h5 file has to also be a directory which contains other information (see the yaml files for what I believe are similar examples). I'm not sure why we have rl giving us Gaussian policies vs Gibbs policies. What's the difference? They should just be functions mapping from states to actions? After that, it seems like we're just simulating stuff and hopefully a video would appear if I can get this to run. """ np.set_printoptions(suppress=True, precision=5, linewidth=1000) parser = argparse.ArgumentParser() # MDP options parser.add_argument('policy', type=str) parser.add_argument('output_dir', type=str) parser.add_argument('--deterministic', default=1, type=int) parser.add_argument('--max_steps', type=int, required=True) parser.add_argument('--env_name', type=str, default=None) args = parser.parse_args() util.mkdir_p(args.output_dir) assert not os.listdir(args.output_dir), '%s is not empty' % args.output_dir print 'Writing to', args.output_dir # Load the saved state policy_file, policy_key = util.split_h5_name(args.policy) print 'Loading policy parameters from %s in %s' % (policy_key, policy_file) with h5py.File(policy_file, 'r') as f: train_args = json.loads(f.attrs['args']) dset = f[policy_key] import pprint pprint.pprint(dict(dset.attrs)) # Initialize the MDP env_name = train_args['env_name'] if args.env_name is None else args.env_name print 'Loading environment', env_name mdp = rlgymenv.RLGymMDP(env_name) util.header('MDP observation space, action space sizes: %d, %d\n' % (mdp.obs_space.dim, mdp.action_space.storage_size)) util.header('Max steps is {}'.format(args.max_steps)) # Initialize the policy and load its parameters enable_obsnorm = bool(train_args['enable_obsnorm']) if 'enable_obsnorm' in train_args else train_args['obsnorm_mode'] != 'none' if isinstance(mdp.action_space, policyopt.ContinuousSpace): policy_cfg = rl.GaussianPolicyConfig( hidden_spec=train_args['policy_hidden_spec'], min_stdev=0., init_logstdev=0., enable_obsnorm=enable_obsnorm) policy = rl.GaussianPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GaussianPolicy') else: policy_cfg = rl.GibbsPolicyConfig( hidden_spec=train_args['policy_hidden_spec'], enable_obsnorm=enable_obsnorm) policy = rl.GibbsPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GibbsPolicy') policy.load_h5(policy_file, policy_key) # Animate sim = mdp.new_sim() steps = 0 exit = False while not exit: sim.reset() while not sim.done: a = policy.sample_actions(sim.obs[None,:], bool(args.deterministic))[0][0,:] sim.step(a) sim.draw() viewer = sim.env.viewer data, w, h = viewer.get_image() image = np.fromstring(data, dtype='uint8').reshape(h, w, 3)[::-1,:,:] cv2.imwrite('%s/img_%08d.png' % (args.output_dir, steps), image[:,:,::-1]) print steps steps += 1 if steps >= args.max_steps: exit = True break
def phase1_train(spec, specfilename): util.header('=== Phase 1: training ===') # Generate array job that trains all algorithms # over all tasks, for all dataset sizes (3 loops) taskname2dset = gen_taskname2outfile(spec) # Theano GPU command prefix gpu_cmd_prefix = 'THEANO_FLAGS=mode=FAST_RUN,floatX=float32,device=gpu' # Make checkpoint dir. All outputs go here checkptdir = os.path.join(spec['options']['storagedir'], spec['options']['checkpt_subdir']) util.mkdir_p(checkptdir) # Make sure checkpoint dir is empty assert not os.listdir( checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir) # Assemble the commands to run on the cluster cmd_templates, outputfilenames, argdicts = [], [], [] for alg in spec['training']['algorithms']: for task in spec['tasks']: for num_trajs in spec['training']['dataset_num_trajs']: assert num_trajs <= spec['training']['full_dataset_num_trajs'] for run in range(spec['training']['runs']): # A string identifier. Used in filenames for this run strid = 'alg={},task={},num_trajs={},run={}'.format( alg['name'], task['name'], num_trajs, run) # check if use gpu if spec['training']['use_gpu']: cmd_templates.append( gpu_cmd_prefix + ' ' + alg['cmd'].replace('\n', ' ').strip()) else: cmd_templates.append(alg['cmd'].replace('\n', ' ').strip()) outputfilenames.append(strid + '.txt') argdicts.append({ 'env': task['env'], 'dataset': taskname2dset[task['name']], 'num_trajs': num_trajs, 'cuts_off_on_success': int(task['cuts_off_on_success']), 'data_subsamp_freq': task['data_subsamp_freq'], 'out': os.path.join(checkptdir, strid + '.h5'), }) pbsopts = spec['options']['pbs'] # runpbs( # cmd_templates, outputfilenames, argdicts, # jobname=pbsopts['jobname'], queue=pbsopts['queue'], nodes=1, ppn=pbsopts['ppn'], # job_range=pbsopts['range'] if 'range' in pbsopts else None, # qsub_script_copy=os.path.join(checkptdir, 'qsub_script.sh') # ) runcmds(cmd_templates, outputfilenames, argdicts, jobname=pbsopts['jobname'], outputfile_dir=os.path.join( checkptdir, 'logs_%s_%s' % (pbsopts['jobname'], datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')))) # Copy the pipeline yaml file to the output dir too shutil.copyfile(specfilename, os.path.join(checkptdir, 'pipeline.yaml'))