Esempio n. 1
0
    def __init__(self, filename, attrs):
        if filename is None:
            util.warn('Warning: not writing log to any file!')
            self.f = None
        else:
            if os.path.exists(filename):
                raise RuntimeError('Log file %s already exists' % filename)
            os.makedirs(os.path.dirname(filename), exist_ok=True)
            self.f = tables.open_file(filename, mode='w')
            for k, v in attrs:
                self.f.root._v_attrs[k] = v
            self.log_table = None

        self.schema = None  # list of col name / types for display
Esempio n. 2
0
    def save_h5(self, h5file, key, extra_attrs=None):
        with h5py.File(h5file, 'a') as f:
            if key in f:
                util.warn('WARNING: key %s already exists in %s' %
                          (key, h5file))
                dset = f[key]
            else:
                dset = f.create_group(key)

            for v in self.get_all_variables():
                dset[v.name] = v.get_value()

            dset.attrs['hash'] = self.savehash()
            if extra_attrs is not None:
                for k, v in extra_attrs:
                    if k in dset.attrs:
                        util.warn(
                            'Warning: attribute %s already exists in %s' %
                            (k, dset.name))
                    dset.attrs[k] = v
Esempio n. 3
0
def btlinesearch(f,
                 x0,
                 fx0,
                 g,
                 dx,
                 accept_ratio,
                 shrink_factor,
                 max_steps,
                 verbose=False):
    '''
    Find a step size t such that f(x0 + t*dx) is within a factor
    accept_ratio of the linearized function value improvement.

    Args:
        f: the function
        x0: starting point for search
        fx0: the value f(x0). Will be computed if set to None.
        g: search direction, typically the gradient of f at x0
        dx: the largest possible step to take
        accept_ratio: termination criterion
        shrink_factor: how much to decrease the step every iteration
    '''
    if fx0 is None: fx0 = f(x0)
    t = 1.
    m = g.dot(dx)
    if accept_ratio != 0 and m > 0: util.warn('WARNING: %.10f not <= 0' % m)
    num_steps = 0
    while num_steps < max_steps:
        true_imp = f(x0 + t * dx) - fx0
        lin_imp = t * m
        if verbose: true_imp, lin_imp, accept_ratio
        if true_imp <= accept_ratio * lin_imp:
            break
        t *= shrink_factor
        num_steps += 1
    return x0 + t * dx, num_steps
Esempio n. 4
0
    def sim_multi(self,
                  policy_fn,
                  obsfeat_fn,
                  cfg,
                  num_threads=None,
                  no_reward=False):
        '''
        Run many simulations, with policy evaluations batched together.

        Samples complete trajectories (stopping when Simulation.done is true,
        or when cfg.max_traj_len is reached) until both
            (1) at least cfg.min_num_trajs trajectories have been sampled, and
            (2) at least cfg.min_total_sa transitions have been sampled.
        '''
        util.warn('sim_multi is deprecated!')
        assert isinstance(cfg, SimConfig)
        Do, Da = self.obs_space.storage_size, self.action_space.storage_size

        if num_threads is None:
            num_threads = multiprocessing.cpu_count()

        # Completed trajectories
        num_sa = 0
        completed_translists = []

        # Simulations and their current trajectories
        simbatch = self.new_batched_sim(
            cfg.batch_size)  # TODO: reuse this across runs
        sim_trans_B = [
            [] for _ in range(cfg.batch_size)
        ]  # list of (o,obsfeat,adist,a,r) transitions for each simulation

        # Keep running simulations until we fill up the quota of trajectories and transitions
        while True:
            # If a simulation is done, pull out and save its trajectory, and restart it.
            for i_sim in range(cfg.batch_size):
                if simbatch.is_done(i_sim) or len(
                        sim_trans_B[i_sim]) >= cfg.max_traj_len:
                    # Save the trajectory
                    completed_translists.append(sim_trans_B[i_sim])
                    num_sa += len(sim_trans_B[i_sim])
                    # and restart the simulation
                    sim_trans_B[i_sim] = []
                    simbatch.reset_sim(i_sim)

            # Are both quotas filled? If so, we're done.
            if len(completed_translists
                   ) >= cfg.min_num_trajs and num_sa >= cfg.min_total_sa:
                break

            # Keep simulating otherwise. Pull together observations from all simulations
            obs_B_Do = simbatch.batch_obs.copy()
            assert obs_B_Do.shape == (cfg.batch_size, Do)

            # Evaluate policy
            obsfeat_B_Df = obsfeat_fn(obs_B_Do)
            a_B_Da, adist_B_Pa = policy_fn(obsfeat_B_Df)
            assert a_B_Da.shape == (cfg.batch_size, Da)
            assert adist_B_Pa.shape[
                0] == cfg.batch_size and adist_B_Pa.ndim == 2

            # Step simulations
            r_B = simbatch.batch_step(a_B_Da, num_threads=num_threads)
            if no_reward: r_B[:] = np.nan

            # Save the transitions
            for i_sim in range(cfg.batch_size):
                sim_trans_B[i_sim].append(
                    (obs_B_Do[i_sim, :], obsfeat_B_Df[i_sim, :],
                     adist_B_Pa[i_sim, :], a_B_Da[i_sim, :], r_B[i_sim]))

        assert sum(len(tlist) for tlist in completed_translists) == num_sa

        # Pack together each trajectory individually
        def translist_to_traj(tlist):
            obs_T_Do = np.stack([trans[0] for trans in tlist])
            assert obs_T_Do.shape == (len(tlist), self.obs_space.storage_size)
            obsfeat_T_Df = np.stack([trans[1] for trans in tlist])
            assert obsfeat_T_Df.shape[0] == len(tlist)
            adist_T_Pa = np.stack([trans[2] for trans in tlist])
            assert adist_T_Pa.ndim == 2 and adist_T_Pa.shape[0] == len(tlist)
            a_T_Da = np.stack([trans[3] for trans in tlist])
            assert a_T_Da.shape == (len(tlist), self.action_space.storage_size)
            r_T = np.stack([trans[4] for trans in tlist])
            assert r_T.shape == (len(tlist), )
            return Trajectory(obs_T_Do, obsfeat_T_Df, adist_T_Pa, a_T_Da, r_T)

        completed_trajs = [
            translist_to_traj(tlist) for tlist in completed_translists
        ]
        assert len(completed_trajs) >= cfg.min_num_trajs and sum(
            len(traj) for traj in completed_trajs) >= cfg.min_total_sa
        return TrajBatch.FromTrajs(completed_trajs)
Esempio n. 5
0
        global _global_sim_info
        mdp, policy_fn, obsfeat_fn, max_traj_len = _global_sim_info
        return mdp.sim_single(policy_fn, obsfeat_fn, max_traj_len)
    except KeyboardInterrupt:
        pass


# Stuff for temporarily disabling MKL threading during multiprocessing
# http://stackoverflow.com/a/28293128
import ctypes
mkl_rt = None
try:
    mkl_rt = ctypes.CDLL('libmkl_rt.so')
    mkl_set_num_threads = mkl_rt.MKL_Set_Num_Threads
    mkl_get_max_threads = mkl_rt.MKL_Get_Max_Threads
except OSError:  # library not found
    util.warn(
        'MKL runtime not found. Will not attempt to disable multithreaded MKL for parallel rollouts.'
    )
from contextlib import contextmanager


@contextmanager
def set_mkl_threads(n):
    if mkl_rt is not None:
        orig = mkl_get_max_threads()
        mkl_set_num_threads(n)
    yield
    if mkl_rt is not None:
        mkl_set_num_threads(orig)
Esempio n. 6
0
    def sim_multi(self, policy_fn, obsfeat_fn, cfg, num_threads=None, no_reward=False):
        '''
        Run many simulations, with policy evaluations batched together.

        Samples complete trajectories (stopping when Simulation.done is true,
        or when cfg.max_traj_len is reached) until both
            (1) at least cfg.min_num_trajs trajectories have been sampled, and
            (2) at least cfg.min_total_sa transitions have been sampled.
        '''
        util.warn('sim_multi is deprecated!')
        assert isinstance(cfg, SimConfig)
        Do, Da = self.obs_space.storage_size, self.action_space.storage_size

        if num_threads is None:
            num_threads = multiprocessing.cpu_count()

        # Completed trajectories
        num_sa = 0
        completed_translists = []

        # Simulations and their current trajectories
        simbatch = self.new_batched_sim(cfg.batch_size) # TODO: reuse this across runs
        sim_trans_B = [[] for _ in xrange(cfg.batch_size)] # list of (o,obsfeat,adist,a,r) transitions for each simulation

        # Keep running simulations until we fill up the quota of trajectories and transitions
        while True:
            # If a simulation is done, pull out and save its trajectory, and restart it.
            for i_sim in xrange(cfg.batch_size):
                if simbatch.is_done(i_sim) or len(sim_trans_B[i_sim]) >= cfg.max_traj_len:
                    # Save the trajectory
                    completed_translists.append(sim_trans_B[i_sim])
                    num_sa += len(sim_trans_B[i_sim])
                    # and restart the simulation
                    sim_trans_B[i_sim] = []
                    simbatch.reset_sim(i_sim)

            # Are both quotas filled? If so, we're done.
            if len(completed_translists) >= cfg.min_num_trajs and num_sa >= cfg.min_total_sa:
                break

            # Keep simulating otherwise. Pull together observations from all simulations
            obs_B_Do = simbatch.batch_obs.copy(); assert obs_B_Do.shape == (cfg.batch_size, Do)

            # Evaluate policy
            obsfeat_B_Df = obsfeat_fn(obs_B_Do)
            a_B_Da, adist_B_Pa = policy_fn(obsfeat_B_Df)
            assert a_B_Da.shape == (cfg.batch_size, Da)
            assert adist_B_Pa.shape[0] == cfg.batch_size and adist_B_Pa.ndim == 2

            # Step simulations
            r_B = simbatch.batch_step(a_B_Da, num_threads=num_threads)
            if no_reward: r_B[:] = np.nan

            # Save the transitions
            for i_sim in xrange(cfg.batch_size):
                sim_trans_B[i_sim].append((obs_B_Do[i_sim,:], obsfeat_B_Df[i_sim,:], adist_B_Pa[i_sim,:], a_B_Da[i_sim,:], r_B[i_sim]))

        assert sum(len(tlist) for tlist in completed_translists) == num_sa

        # Pack together each trajectory individually
        def translist_to_traj(tlist):
            obs_T_Do = np.stack([trans[0] for trans in tlist]);  assert obs_T_Do.shape == (len(tlist), self.obs_space.storage_size)
            obsfeat_T_Df = np.stack([trans[1] for trans in tlist]); assert obsfeat_T_Df.shape[0] == len(tlist)
            adist_T_Pa = np.stack([trans[2] for trans in tlist]); assert adist_T_Pa.ndim == 2 and adist_T_Pa.shape[0] == len(tlist)
            a_T_Da = np.stack([trans[3] for trans in tlist]); assert a_T_Da.shape == (len(tlist), self.action_space.storage_size)
            r_T = np.stack([trans[4] for trans in tlist]); assert r_T.shape == (len(tlist),)
            return Trajectory(obs_T_Do, obsfeat_T_Df, adist_T_Pa, a_T_Da, r_T)
        completed_trajs = [translist_to_traj(tlist) for tlist in completed_translists]
        assert len(completed_trajs) >= cfg.min_num_trajs and sum(len(traj) for traj in completed_trajs) >= cfg.min_total_sa
        return TrajBatch.FromTrajs(completed_trajs)
Esempio n. 7
0
_global_sim_info = None
def _rollout():
    try:
        import os, random; random.seed(os.urandom(4)); np.random.seed(int(os.urandom(4).encode('hex'), 16))
        global _global_sim_info
        mdp, policy_fn, obsfeat_fn, max_traj_len = _global_sim_info
        return mdp.sim_single(policy_fn, obsfeat_fn, max_traj_len)
    except KeyboardInterrupt:
        pass

# Stuff for temporarily disabling MKL threading during multiprocessing
# http://stackoverflow.com/a/28293128
import ctypes
mkl_rt = None
try:
    mkl_rt = ctypes.CDLL('libmkl_rt.so')
    mkl_set_num_threads = mkl_rt.MKL_Set_Num_Threads
    mkl_get_max_threads = mkl_rt.MKL_Get_Max_Threads
except OSError: # library not found
    util.warn('MKL runtime not found. Will not attempt to disable multithreaded MKL for parallel rollouts.')
from contextlib import contextmanager
@contextmanager
def set_mkl_threads(n):
    if mkl_rt is not None:
        orig = mkl_get_max_threads()
        mkl_set_num_threads(n)
    yield
    if mkl_rt is not None:
        mkl_set_num_threads(orig)
Esempio n. 8
0
'''
Check how many snapshots were saved in a set of log files
'''

import argparse
import h5py
from policyopt import util

parser = argparse.ArgumentParser()
parser.add_argument('logfiles', nargs='+', type=str)
args = parser.parse_args()

for filename in args.logfiles:
    if filename.endswith('.h5'):
        try:
            with h5py.File(filename, 'r') as f:
                snapshot_name = sorted(f['snapshots'].keys())[-1]
                assert snapshot_name.startswith('iter')
                last_snapshot_iter = int(snapshot_name[len('iter'):])
                #if last_snapshot_iter != 300:
                print filename, last_snapshot_iter
        except:
            util.warn('Error opening {}'.format(filename))
            continue