Esempio n. 1
0
def phase1_train(spec, specfilename):
    util.header('=== Phase 1: training ===')

    # Generate array job that trains all algorithms
    # over all tasks, for all dataset sizes (3 loops)

    taskname2dset = gen_taskname2outfile(spec)

    # Make checkpoint dir. All outputs go here
    checkptdir = os.path.join(spec['options']['storagedir'], spec['options']['checkpt_subdir'])
    util.mkdir_p(checkptdir)
    # Make sure checkpoint dir is empty
    assert not os.listdir(checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir)

    # Assemble the commands to run on the cluster
    cmd_templates, outputfilenames, argdicts = [], [], []
    for alg in spec['training']['algorithms']:
        for task in spec['tasks']:
            for num_trajs in spec['training']['dataset_num_trajs']:
                assert num_trajs <= spec['training']['full_dataset_num_trajs']
                for run in range(spec['training']['runs']):
                    # A string identifier. Used in filenames for this run
                    strid = 'alg={},task={},num_trajs={},run={}'.format(alg['name'], task['name'], num_trajs, run)
                    cmd_templates.append(alg['cmd'].replace('\n', ' ').strip())
                    outputfilenames.append(strid + '.txt')
                    argdicts.append({
                        'env': task['env'],
                        'dataset': taskname2dset[task['name']],
                        'num_trajs': num_trajs,
                        'cuts_off_on_success': int(task['cuts_off_on_success']),
                        'data_subsamp_freq': task['data_subsamp_freq'],
                        'out': os.path.join(checkptdir, strid + '.h5'),
                    })

    pbsopts = spec['options']['pbs']
    runpbs(
        cmd_templates, outputfilenames, argdicts,
        jobname=pbsopts['jobname'], queue=pbsopts['queue'], nodes=1, ppn=pbsopts['ppn'],
        job_range=pbsopts['range'] if 'range' in pbsopts else None,
        qsub_script_copy=os.path.join(checkptdir, 'qsub_script.sh')
    )

    # Copy the pipeline yaml file to the output dir too
    shutil.copyfile(specfilename, os.path.join(checkptdir, 'pipeline.yaml'))

    # Keep git commit
    import subprocess
    git_hash = subprocess.check_output('git rev-parse HEAD', shell=True).strip()
    with open(os.path.join(checkptdir, 'git_hash.txt'), 'w') as f:
        f.write(git_hash + '\n')
Esempio n. 2
0
def phase1_train(spec, specfilename):
    """ In the normal code, this rounds up a long list of commands of the form
    `python (script name) (arguments)` which can be run on a cluster.

    It's really cool how this works. The `cmd_templates` list turns into a bunch
    of python script calls, except it has string formatting to allow the
    arguments to fill them in. A much better way than writing a long bash
    script! (Actually, to *get* a bash script, just write these one by one to a
    file and then I think running the file is OK.)

    I modified this to run sequentially.
    """
    util.header('=== Phase 1: training ===')

    # Generate array job that trains (1) all algorithms over (2) all tasks, for
    # (3) all dataset sizes, so yes it's three loops.
    taskname2dset = gen_taskname2outfile(spec)

    # Make checkpoint dir. All outputs go here
    checkptdir = os.path.join(spec['options']['storagedir'], spec['options']['checkpt_subdir'])
    util.mkdir_p(checkptdir)
    # Make sure checkpoint dir is empty
    assert not os.listdir(checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir)

    # Assemble the commands to run on the cluster
    cmd_templates, outputfilenames, argdicts = [], [], []
    for alg in spec['training']['algorithms']:
        for task in spec['tasks']:
            for num_trajs in spec['training']['dataset_num_trajs']:
                assert num_trajs <= spec['training']['full_dataset_num_trajs']
                for run in range(spec['training']['runs']):
                    # A string identifier. Used in filenames for this run
                    strid = 'alg={},task={},num_trajs={},run={}'.format(alg['name'], task['name'], num_trajs, run)
                    cmd_templates.append(alg['cmd'].replace('\n', ' ').strip())
                    outputfilenames.append(strid + '.txt')
                    argdicts.append({
                        'env': task['env'],
                        'dataset': taskname2dset[task['name']],
                        'num_trajs': num_trajs,
                        'cuts_off_on_success': int(task['cuts_off_on_success']),
                        'data_subsamp_freq': task['data_subsamp_freq'],
                        'out': os.path.join(checkptdir, strid + '.h5'),
                    })

    # (New code from Daniel) Put commands in a list and run them sequentially.
    all_commands = [x.format(**y) for (x,y) in zip(cmd_templates,argdicts)]
    print("Total number of commands to run: {}.".format(len(all_commands)))
    for command in all_commands:
        subprocess.call(command.split(" "))
Esempio n. 3
0
def gen_taskname2outfile(spec, assert_not_exists=False):
    '''
    Generate dataset filenames for each task. Phase 0 (sampling) writes to these files,
    phase 1 (training) reads from them.
    '''
    taskname2outfile = {}
    trajdir = os.path.join(spec['options']['storagedir'], spec['options']['traj_subdir'])
    util.mkdir_p(trajdir)
    for task in spec['tasks']:
        assert task['name'] not in taskname2outfile
        fname = os.path.join(trajdir, 'trajs_{}.h5'.format(task['name']))
        if assert_not_exists:
            assert not os.path.exists(fname), 'Traj destination {} already exists'.format(fname)
        taskname2outfile[task['name']] = fname
    return taskname2outfile
Esempio n. 4
0
def gen_taskname2outfile(spec, assert_not_exists=False):
    '''
    Generate dataset filenames for each task. Phase 0 (sampling) writes to these files,
    phase 1 (training) reads from them.
    '''
    taskname2outfile = {}
    trajdir = os.path.join(spec['options']['storagedir'], spec['options']['traj_subdir'])
    util.mkdir_p(trajdir)
    for task in spec['tasks']:
        assert task['name'] not in taskname2outfile
        fname = os.path.join(trajdir, 'trajs_{}.h5'.format(task['name']))
        if assert_not_exists:
            assert not os.path.exists(fname), 'Traj destination {} already exists'.format(fname)
        taskname2outfile[task['name']] = fname
    return taskname2outfile
Esempio n. 5
0
def phase1_train(spec, specfilename):
    util.header('=== Phase 1: training ===')

    # Generate array job that trains all algorithms
    # over all tasks, for all dataset sizes (3 loops)

    taskname2dset = gen_taskname2outfile(spec)

    # Make checkpoint dir. All outputs go here
    checkptdir = os.path.join(spec['options']['storagedir'], spec['options']['checkpt_subdir'])
    util.mkdir_p(checkptdir)
    # Make sure checkpoint dir is empty
    assert not os.listdir(checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir)

    # Assemble the commands to run on the cluster
    cmd_templates, outputfilenames, argdicts = [], [], []
    for alg in spec['training']['algorithms']:
        for task in spec['tasks']:
            for num_trajs in spec['training']['dataset_num_trajs']:
                assert num_trajs <= spec['training']['full_dataset_num_trajs']
                for run in range(spec['training']['runs']):
                    # A string identifier. Used in filenames for this run
                    strid = 'alg={},task={},num_trajs={},run={}'.format(alg['name'], task['name'], num_trajs, run)
                    cmd_templates.append(alg['cmd'].replace('\n', ' ').strip())
                    outputfilenames.append(strid + '.txt')
                    argdicts.append({
                        'env': task['env'],
                        'dataset': taskname2dset[task['name']],
                        'num_trajs': num_trajs,
                        'cuts_off_on_success': int(task['cuts_off_on_success']),
                        'data_subsamp_freq': task['data_subsamp_freq'],
                        'out': os.path.join(checkptdir, strid + '.h5'),
                    })

    for x, y in zip(cmd_templates, argdicts):  
        subprocess.call (x.format(**y).split(" "))

    # Copy the pipeline yaml file to the output dir too
    shutil.copyfile(specfilename, os.path.join(checkptdir, 'pipeline.yaml'))

    # Keep git commit
    git_hash = subprocess.check_output('git rev-parse HEAD', shell=True).strip()
    with open(os.path.join(checkptdir, 'git_hash.txt'), 'w') as f:
        f.write(git_hash + '\n')
Esempio n. 6
0
def main():
    np.set_printoptions(suppress=True, precision=5, linewidth=1000)

    parser = argparse.ArgumentParser()
    # MDP options
    parser.add_argument('policy', type=str)
    parser.add_argument('output_dir', type=str)
    parser.add_argument('--deterministic', default=1, type=int)
    parser.add_argument('--max_steps', type=int, required=True)
    parser.add_argument('--env_name', type=str, default=None)
    args = parser.parse_args()

    util.mkdir_p(args.output_dir)
    assert not os.listdir(args.output_dir), '%s is not empty' % args.output_dir
    print 'Writing to', args.output_dir

    # Load the saved state
    policy_file, policy_key = util.split_h5_name(args.policy)
    print 'Loading policy parameters from %s in %s' % (policy_key, policy_file)
    with h5py.File(policy_file, 'r') as f:
        train_args = json.loads(f.attrs['args'])
        dset = f[policy_key]
        import pprint
        pprint.pprint(dict(dset.attrs))

    # Initialize the MDP
    env_name = train_args['env_name'] if args.env_name is None else args.env_name
    print 'Loading environment', env_name
    mdp = rllabenv.RLLabMDP(env_name)
    util.header('MDP observation space, action space sizes: %d, %d\n' % (mdp.obs_space.dim, mdp.action_space.storage_size))

    util.header('Max steps is {}'.format(args.max_steps))

    # Initialize the policy and load its parameters
    enable_obsnorm = bool(train_args['enable_obsnorm']) if 'enable_obsnorm' in train_args else train_args['obsnorm_mode'] != 'none'
    if isinstance(mdp.action_space, policyopt.ContinuousSpace):
        policy_cfg = rl.GaussianPolicyConfig(
            hidden_spec=train_args['policy_hidden_spec'],
            min_stdev=0.,
            init_logstdev=0.,
            enable_obsnorm=enable_obsnorm)
        policy = rl.GaussianPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GaussianPolicy')
    else:
        policy_cfg = rl.GibbsPolicyConfig(
            hidden_spec=train_args['policy_hidden_spec'],
            enable_obsnorm=enable_obsnorm)
        policy = rl.GibbsPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GibbsPolicy')
    policy.load_h5(policy_file, policy_key)

    # Animate
    sim = mdp.new_sim()
    steps = 0
    exit = False
    while not exit:
        sim.reset()
        while not sim.done:
            a = policy.sample_actions(sim.obs[None,:], bool(args.deterministic))[0][0,:]
            sim.step(a)
            sim.draw()
            viewer = sim.env.viewer
            data, w, h = viewer.get_image()
            image = np.fromstring(data, dtype='uint8').reshape(h, w, 3)[::-1,:,:]
            cv2.imwrite('%s/img_%08d.png' % (args.output_dir, steps), image[:,:,::-1])

            print steps
            steps += 1

            if steps >= args.max_steps:
                exit = True
                break
Esempio n. 7
0
def main():
    """
    If we have trained policies and snapshots, I think we can use this to watch
    videos of our agent in action. I don't think I can use this without doing
    some training first. This doesn't do training itself; we need to provide a
    policy, but the h5 file has to also be a directory which contains other
    information (see the yaml files for what I believe are similar examples).

    I'm not sure why we have rl giving us Gaussian policies vs Gibbs policies.
    What's the difference? They should just be functions mapping from states to
    actions?

    After that, it seems like we're just simulating stuff and hopefully a video
    would appear if I can get this to run.
    """
    np.set_printoptions(suppress=True, precision=5, linewidth=1000)

    parser = argparse.ArgumentParser()
    # MDP options
    parser.add_argument('policy', type=str)
    parser.add_argument('output_dir', type=str)
    parser.add_argument('--deterministic', default=1, type=int)
    parser.add_argument('--max_steps', type=int, required=True)
    parser.add_argument('--env_name', type=str, default=None)
    args = parser.parse_args()

    util.mkdir_p(args.output_dir)
    assert not os.listdir(args.output_dir), '%s is not empty' % args.output_dir
    print 'Writing to', args.output_dir

    # Load the saved state
    policy_file, policy_key = util.split_h5_name(args.policy)

    print 'Loading policy parameters from %s in %s' % (policy_key, policy_file)
    with h5py.File(policy_file, 'r') as f:
        train_args = json.loads(f.attrs['args'])
        dset = f[policy_key]
        import pprint
        pprint.pprint(dict(dset.attrs))

    # Initialize the MDP
    env_name = train_args['env_name'] if args.env_name is None else args.env_name
    print 'Loading environment', env_name
    mdp = rlgymenv.RLGymMDP(env_name)
    util.header('MDP observation space, action space sizes: %d, %d\n' % (mdp.obs_space.dim, mdp.action_space.storage_size))

    util.header('Max steps is {}'.format(args.max_steps))

    # Initialize the policy and load its parameters
    enable_obsnorm = bool(train_args['enable_obsnorm']) if 'enable_obsnorm' in train_args else train_args['obsnorm_mode'] != 'none'
    if isinstance(mdp.action_space, policyopt.ContinuousSpace):
        policy_cfg = rl.GaussianPolicyConfig(
            hidden_spec=train_args['policy_hidden_spec'],
            min_stdev=0.,
            init_logstdev=0.,
            enable_obsnorm=enable_obsnorm)
        policy = rl.GaussianPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GaussianPolicy')
    else:
        policy_cfg = rl.GibbsPolicyConfig(
            hidden_spec=train_args['policy_hidden_spec'],
            enable_obsnorm=enable_obsnorm)
        policy = rl.GibbsPolicy(policy_cfg, mdp.obs_space, mdp.action_space, 'GibbsPolicy')
    policy.load_h5(policy_file, policy_key)

    # Animate
    sim = mdp.new_sim()
    steps = 0
    exit = False
    while not exit:
        sim.reset()
        while not sim.done:
            a = policy.sample_actions(sim.obs[None,:], bool(args.deterministic))[0][0,:]
            sim.step(a)
            sim.draw()
            viewer = sim.env.viewer
            data, w, h = viewer.get_image()
            image = np.fromstring(data, dtype='uint8').reshape(h, w, 3)[::-1,:,:]
            cv2.imwrite('%s/img_%08d.png' % (args.output_dir, steps), image[:,:,::-1])

            print steps
            steps += 1

            if steps >= args.max_steps:
                exit = True
                break
Esempio n. 8
0
def phase1_train(spec, specfilename):
    util.header('=== Phase 1: training ===')

    # Generate array job that trains all algorithms
    # over all tasks, for all dataset sizes (3 loops)

    taskname2dset = gen_taskname2outfile(spec)

    # Theano GPU command prefix
    gpu_cmd_prefix = 'THEANO_FLAGS=mode=FAST_RUN,floatX=float32,device=gpu'

    # Make checkpoint dir. All outputs go here
    checkptdir = os.path.join(spec['options']['storagedir'],
                              spec['options']['checkpt_subdir'])
    util.mkdir_p(checkptdir)
    # Make sure checkpoint dir is empty
    assert not os.listdir(
        checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir)

    # Assemble the commands to run on the cluster
    cmd_templates, outputfilenames, argdicts = [], [], []
    for alg in spec['training']['algorithms']:
        for task in spec['tasks']:
            for num_trajs in spec['training']['dataset_num_trajs']:
                assert num_trajs <= spec['training']['full_dataset_num_trajs']
                for run in range(spec['training']['runs']):
                    # A string identifier. Used in filenames for this run
                    strid = 'alg={},task={},num_trajs={},run={}'.format(
                        alg['name'], task['name'], num_trajs, run)
                    # check if use gpu
                    if spec['training']['use_gpu']:
                        cmd_templates.append(
                            gpu_cmd_prefix + ' ' +
                            alg['cmd'].replace('\n', ' ').strip())
                    else:
                        cmd_templates.append(alg['cmd'].replace('\n',
                                                                ' ').strip())
                    outputfilenames.append(strid + '.txt')
                    argdicts.append({
                        'env':
                        task['env'],
                        'dataset':
                        taskname2dset[task['name']],
                        'num_trajs':
                        num_trajs,
                        'cuts_off_on_success':
                        int(task['cuts_off_on_success']),
                        'data_subsamp_freq':
                        task['data_subsamp_freq'],
                        'out':
                        os.path.join(checkptdir, strid + '.h5'),
                    })

    pbsopts = spec['options']['pbs']
    #    runpbs(
    #        cmd_templates, outputfilenames, argdicts,
    #        jobname=pbsopts['jobname'], queue=pbsopts['queue'], nodes=1, ppn=pbsopts['ppn'],
    #        job_range=pbsopts['range'] if 'range' in pbsopts else None,
    #        qsub_script_copy=os.path.join(checkptdir, 'qsub_script.sh')
    #    )
    runcmds(cmd_templates,
            outputfilenames,
            argdicts,
            jobname=pbsopts['jobname'],
            outputfile_dir=os.path.join(
                checkptdir, 'logs_%s_%s' %
                (pbsopts['jobname'],
                 datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S'))))

    # Copy the pipeline yaml file to the output dir too
    shutil.copyfile(specfilename, os.path.join(checkptdir, 'pipeline.yaml'))