Example #1
0
 def __init__(self, input_B_Ih_Iw_Ci, input_shape, Co, Fh, Fw, Sh, Sw,
              padding, initializer):
     assert len(input_shape) == 3
     Ih, Iw, Ci = input_shape
     if padding == 'SAME':
         Oh = np.ceil(float(Ih) / float(Sh))
         Ow = np.ceil(float(Iw) / float(Sw))
     elif padding == 'VALID':
         Oh = np.ceil(float(Ih - Fh + 1) / float(Sh))
         Ow = np.ceil(float(Iw - Fw + 1) / float(Sw))
     util.header(
         'Conv(chanin=%d, chanout=%d, filth=%d, filtw=%d, outh=%d, outw=%d, strideh=%d, stridew=%d, padding=%s)'
         % (Ci, Co, Fh, Fw, Oh, Ow, Sh, Sw, padding))
     self._output_shape = (Oh, Ow, Co)
     with tf.variable_scope(type(self).__name__) as self.varscope:
         if initializer is None:
             initializer = tf.contrib.layers.xavier_initializer()
         self.W_Fh_Fw_Ci_Co = tf.get_variable('W',
                                              shape=[Fh, Fw, Ci, Co],
                                              initializer=initializer)
         self.b_1_1_1_Co = tf.get_variable(
             'b',
             shape=[1, 1, 1, Co],
             initializer=tf.constant_initializer(0.))
         self.output_B_Oh_Ow_Co = tf.nn.conv2d(
             input_B_Ih_Iw_Ci, self.W_Fh_Fw_Ci_Co, [1, Sh, Sw, 1],
             padding) + self.b_1_1_1_Co
Example #2
0
 def __init__(self,
              input_B_Di,
              input_shape,
              output_shape,
              Winitializer,
              binitializer,
              debug=False):
     assert len(input_shape) == len(output_shape) == 1
     if debug:
         util.header('Affine(in=%d, out=%d)' %
                     (input_shape[0], output_shape[0]))
     self._output_shape = (output_shape[0], )
     with tf.variable_scope(type(self).__name__) as self.varscope:
         if Winitializer is None:
             Winitializer = tf.contrib.layers.xavier_initializer()
         if binitializer is None:
             binitializer = tf.zeros_initializer
         self.W_Di_Do = tf.get_variable(
             'W',
             shape=[input_shape[0], output_shape[0]],
             initializer=Winitializer)
         self.b_1_Do = tf.get_variable('b',
                                       shape=[1, output_shape[0]],
                                       initializer=binitializer)
         self.output_B_Do = tf.matmul(input_B_Di,
                                      self.W_Di_Do) + self.b_1_Do
Example #3
0
 def __init__(self, input_B_Di, output_shape, func):
     util.header('Nonlinearity(func=%s)' % func)
     self._output_shape = output_shape
     with tf.variable_scope(type(self).__name__) as self.varscope:
         self.output_B_Do = {
             'relu': tf.nn.relu,
             'elu': tf.nn.elu,
             'tanh': tf.tanh
         }[func](input_B_Di)
Example #4
0
    def __init__(self, input_, outdim=2, debug=False):
        assert outdim >= 1
        self._outdim = outdim
        input_shape = tuple(input_.get_shape().as_list())
        to_flatten = input_shape[self._outdim - 1:]
        if any(s is None for s in to_flatten):
            flattened = None
        else:
            flattened = int(np.prod(to_flatten))

        self._output_shape = input_shape[1:self._outdim - 1] + (flattened, )
        if debug:
            util.header('Flatten(new_shape=%s)' % str(self._output_shape))
        pre_shape = tf.shape(input_)[:self._outdim - 1:]
        to_flatten = tf.reduce_prod(tf.shape(input_)[self._outdim - 1:])
        self._output = tf.reshape(
            input_, tf.concat(0, [pre_shape, tf.pack([to_flatten])]))
Example #5
0
    def load_h5(self, sess, h5file, key):
        with h5py.File(h5file, 'r') as f:
            dset = f[key]

            ops = []
            for v in self.get_variables():
                util.header('Reading {}'.format(v.name))
                if v.name in dset:
                    ops.append(v.assign(dset[v.name][...]))
                else:
                    raise RuntimeError('Variable {} not found in {}'.format(
                        v.name, dset))

            sess.run(ops)

            h = self.savehash(sess)
            assert h == dset[self.varscope.name].attrs[
                'hash'], 'Checkpoint hash {} does not match loaded hash {}'.format(
                    dset[self.varscope.name].attrs['hash'], h)
Example #6
0
 def __init__(self, input_B_Di, input_shape, output_shape, initializer):
     assert len(input_shape) == len(output_shape) == 1
     util.header('Affine(in=%d, out=%d)' %
                 (input_shape[0], output_shape[0]))
     self._output_shape = (output_shape[0], )
     with tf.variable_scope(type(self).__name__) as self.varscope:
         if initializer is None:
             # initializer = tf.truncated_normal_initializer(mean=0., stddev=np.sqrt(2./input_shape[0]))
             initializer = tf.contrib.layers.xavier_initializer()
         self.W_Di_Do = tf.get_variable(
             'W',
             shape=[input_shape[0], output_shape[0]],
             initializer=initializer)
         self.b_1_Do = tf.get_variable(
             'b',
             shape=[1, output_shape[0]],
             initializer=tf.constant_initializer(0.))
         self.output_B_Do = tf.matmul(input_B_Di,
                                      self.W_Di_Do) + self.b_1_Do
Example #7
0
 def __init__(self, input_B_Ih_Iw_Ci, input_shape, Co, Fh, Fw, Oh, Ow, Sh,
              Sw, padding, initializer):
     # TODO: calculate Oh and Ow from the other stuff.
     assert len(input_shape) == 3
     Ci = input_shape[2]
     util.header(
         'Conv(chanin=%d, chanout=%d, filth=%d, filtw=%d, outh=%d, outw=%d, strideh=%d, stridew=%d, padding=%s)'
         % (Ci, Co, Fh, Fw, Oh, Ow, Sh, Sw, padding))
     self._output_shape = (Oh, Ow, Co)
     with tf.variable_scope(type(self).__name__) as self.varscope:
         if initializer is None:
             # initializer = tf.truncated_normal_initializer(mean=0., stddev=np.sqrt(2./(Fh*Fw*Ci)))
             initializer = tf.contrib.layers.xavier_initializer()
         self.W_Fh_Fw_Ci_Co = tf.get_variable('W',
                                              shape=[Fh, Fw, Ci, Co],
                                              initializer=initializer)
         self.b_1_1_1_Co = tf.get_variable(
             'b',
             shape=[1, 1, 1, Co],
             initializer=tf.constant_initializer(0.))
         self.output_B_Oh_Ow_Co = tf.nn.conv2d(
             input_B_Ih_Iw_Ci, self.W_Fh_Fw_Ci_Co, [1, Sh, Sw, 1],
             padding) + self.b_1_1_1_Co
Example #8
0
def phase_train(spec, spec_file, git_hash):
    util.header('=== Running {} ==='.format(spec_file))

    # Make checkpoint dir. All outputs go here
    storagedir = spec['options']['storagedir']
    n_workers = spec['options']['n_workers']
    checkptdir = os.path.join(spec['options']['storagedir'],
                              spec['options']['checkpt_subdir'])
    util.mkdir_p(checkptdir)
    assert not os.listdir(
        checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir)

    cmd_templates, output_filenames, argdicts = [], [], []
    train_spec = spec['training']
    arg_spec = spec['arguments']
    for alg in train_spec['algorithms']:
        for bline in train_spec['baselines']:
            for parch in train_spec['policy_archs']:
                for barch in train_spec['baseline_archs']:
                    for rad in arg_spec['radius']:
                        for n_se in arg_spec['n_sensors']:
                            for srange in arg_spec['sensor_ranges']:
                                for n_ev in arg_spec['n_evaders']:
                                    for n_pu in arg_spec['n_pursuers']:
                                        for n_co in arg_spec['n_coop']:
                                            if n_co > n_pu:
                                                continue
                                            for n_po in arg_spec['n_poison']:
                                                for f_rew in arg_spec[
                                                        'food_reward']:
                                                    for p_rew in arg_spec[
                                                            'poison_reward']:
                                                        for e_rew in arg_spec[
                                                                'encounter_reward']:
                                                            for disc in arg_spec[
                                                                    'discounts']:
                                                                for gae in arg_spec[
                                                                        'gae_lambdas']:
                                                                    for run in range(
                                                                            train_spec[
                                                                                'runs']
                                                                    ):
                                                                        strid = (
                                                                            'alg={},bline={},parch={},barch={},'
                                                                            .
                                                                            format(
                                                                                alg['name'],
                                                                                bline,
                                                                                parch,
                                                                                barch
                                                                            ) +
                                                                            'rad={},n_se={},srange={},n_ev={},n_pu={},n_co={},n_po={},'
                                                                            .
                                                                            format(
                                                                                rad,
                                                                                n_se,
                                                                                srange,
                                                                                n_ev,
                                                                                n_pu,
                                                                                n_co,
                                                                                n_po
                                                                            ) +
                                                                            'f_rew={},p_rew={},e_rew={},'
                                                                            .
                                                                            format(
                                                                                f_rew,
                                                                                p_rew,
                                                                                e_rew
                                                                            ) +
                                                                            'disc={},gae={},run={}'
                                                                            .
                                                                            format(
                                                                                disc,
                                                                                gae,
                                                                                run
                                                                            ))
                                                                        cmd_templates.append(
                                                                            alg['cmd']
                                                                            .
                                                                            replace(
                                                                                '\n',
                                                                                ' '
                                                                            ).
                                                                            strip(
                                                                            ))
                                                                        output_filenames.append(
                                                                            strid
                                                                            +
                                                                            '.txt'
                                                                        )
                                                                        argdicts.append({
                                                                            'baseline_type':
                                                                            bline,
                                                                            'radius':
                                                                            rad,
                                                                            'sensor_range':
                                                                            srange,
                                                                            'n_sensors':
                                                                            n_se,
                                                                            'n_pursuers':
                                                                            n_pu,
                                                                            'n_evaders':
                                                                            n_ev,
                                                                            'n_coop':
                                                                            n_co,
                                                                            'n_poison':
                                                                            n_po,
                                                                            'discount':
                                                                            disc,
                                                                            'food_reward':
                                                                            f_rew,
                                                                            'poison_reward':
                                                                            p_rew,
                                                                            'encounter_reward':
                                                                            e_rew,
                                                                            'gae_lambda':
                                                                            gae,
                                                                            'policy_arch':
                                                                            parch,
                                                                            'baseline_arch':
                                                                            barch,
                                                                            'log':
                                                                            os.
                                                                            path
                                                                            .
                                                                            join(
                                                                                checkptdir,
                                                                                strid
                                                                                +
                                                                                '.h5'
                                                                            )
                                                                        })

    util.ok('{} jobs to run...'.format(len(cmd_templates)))
    util.warn('Continue? y/n')
    if input() == 'y':
        pipeline.run_jobs(cmd_templates,
                          output_filenames,
                          argdicts,
                          storagedir,
                          jobname=os.path.split(spec_file)[-1],
                          n_workers=n_workers)
        sys.exit(0)
    else:
        util.failure('Canceled.')
        sys.exit(1)

    # Copy the pipeline yaml file to the output dir too
    shutil.copyfile(spec_file, os.path.join(checkptdir, 'pipeline.yaml'))
    with open(os.path.join(checkptdir, 'git_hash.txt'), 'w') as f:
        f.write(git_hash + '\n')
Example #9
0
    def __init__(self, env, args):
        self.args = args
        env, policies, policy = rltools_envpolicy_parser(env, args)
        if args.baseline_type == 'linear':
            if args.control == 'concurrent':
                baselines = [
                    LinearFeatureBaseline(env.agents[agid].observation_space,
                                          enable_obsnorm=args.enable_obsnorm,
                                          varscope_name='baseline_{}'.format(agid))
                    for agid in range(len(env.agents))
                ]
            else:
                baseline = LinearFeatureBaseline(policy.observation_space,
                                                 enable_obsnorm=args.enable_obsnorm,
                                                 varscope_name='baseline')

        elif args.baseline_type == 'mlp':
            if args.control == 'concurrent':
                baselines = [
                    MLPBaseline(env.agents[agid].observation_space,
                                hidden_spec=args.baseline_hidden_spec,
                                enable_obsnorm=args.enable_obsnorm, enable_vnorm=args.enable_vnorm,
                                max_kl=args.vf_max_kl, damping=args.vf_cg_damping,
                                time_scale=1. / args.max_traj_len,
                                varscope_name='{}_baseline'.format(agid))
                    for agid in range(len(env.agents))
                ]
            else:
                baseline = MLPBaseline(policy.observation_space,
                                       hidden_spec=args.baseline_hidden_spec,
                                       enable_obsnorm=args.enable_obsnorm,
                                       enable_vnorm=args.enable_vnorm, max_kl=args.vf_max_kl,
                                       damping=args.vf_cg_damping,
                                       time_scale=1. / args.max_traj_len, varscope_name='baseline')

        elif args.baseline_type == 'zero':
            if args.control == 'concurrent':
                baselines = [
                    ZeroBaseline(env.agents[agid].observation_space)
                    for agid in range(len(env.agents))
                ]
            else:
                baseline = ZeroBaseline(policy.observation_space)
        else:
            raise NotImplementedError()

        if args.sampler == 'simple':
            if args.control == 'centralized':
                sampler_cls = SimpleSampler
            elif args.control == 'decentralized':
                sampler_cls = DecSampler
            elif args.control == 'concurrent':
                sampler_cls = ConcSampler
            else:
                raise NotImplementedError()
            sampler_args = dict(max_traj_len=args.max_traj_len, n_timesteps=args.n_timesteps,
                                n_timesteps_min=args.n_timesteps_min,
                                n_timesteps_max=args.n_timesteps_max,
                                timestep_rate=args.timestep_rate, adaptive=args.adaptive_batch,
                                enable_rewnorm=args.enable_rewnorm)
        elif args.sampler == 'parallel':
            sampler_cls = ParallelSampler
            sampler_args = dict(max_traj_len=args.max_traj_len, n_timesteps=args.n_timesteps,
                                n_timesteps_min=args.n_timesteps_min,
                                n_timesteps_max=args.n_timesteps_max,
                                timestep_rate=args.timestep_rate, adaptive=args.adaptive_batch,
                                enable_rewnorm=args.enable_rewnorm, n_workers=args.sampler_workers,
                                mode=args.control, discard_extra=False)

        else:
            raise NotImplementedError()

        step_func = TRPO(max_kl=args.max_kl)
        if args.control == 'concurrent':
            self.algo = ConcurrentPolicyOptimizer(env=env, policies=policies, baselines=baselines,
                                                  step_func=step_func, discount=args.discount,
                                                  gae_lambda=args.gae_lambda,
                                                  sampler_cls=sampler_cls,
                                                  sampler_args=sampler_args, n_iter=args.n_iter,
                                                  target_policy=policy,
                                                  interp_alpha=args.interp_alpha)
        else:
            self.algo = SamplingPolicyOptimizer(env=env, policy=policy, baseline=baseline,
                                                step_func=step_func, discount=args.discount,
                                                gae_lambda=args.gae_lambda, sampler_cls=sampler_cls,
                                                sampler_args=sampler_args, n_iter=args.n_iter)

        argstr = json.dumps(vars(args), separators=(',', ':'), indent=2)
        util.header(argstr)
        self.log_f = log.TrainingLog(args.log, [('args', argstr)], debug=args.debug)
Example #10
0
    def __init__(
        self,
        input_B_T_Di,
        input_shape,
        output_dim,
        layer_specjson,  # hidden_dim, output_dim, hidden_nonlin=tf.nn.relu,
        # hidden_init_trainable=False
        debug=False):
        layerspec = json.loads(layer_specjson)
        if debug:
            util.ok('Loading GRUNet specification')
            util.header(json.dumps(layerspec, indent=2,
                                   separators=(',', ': ')))
        self._hidden_dim = layerspec['gru_hidden_dim']
        self._hidden_nonlin = {
            'relu': tf.nn.relu,
            'elu': tf.nn.elu,
            'tanh': tf.tanh,
            'identity': tf.identity
        }[layerspec['gru_hidden_nonlin']]
        self._hidden_init_trainable = layerspec['gru_hidden_init_trainable']
        self._output_dim = output_dim
        assert len(input_shape) >= 1  # input_shape is Di
        self.input_B_T_Di = input_B_T_Di
        with tf.variable_scope(type(self).__name__) as self.varscope:
            if 'feature_net' in layerspec:
                _feature_net = FeedforwardNet(input_B_T_Di, input_shape,
                                              layerspec['feature_net'])
                self._feature_shape = _feature_net.output_shape
                self._feature = tf.reshape(
                    _feature_net.output,
                    tf.pack([
                        tf.shape(self.input_B_T_Di)[0],
                        tf.shape(self.input_B_T_Di)[1], self._feature_shape[-1]
                    ]))
            else:
                self._feature_shape = input_shape
                self._feature = input_B_T_Di
            self._step_input = tf.placeholder(tf.float32,
                                              shape=(None, ) +
                                              self._feature_shape,
                                              name='step_input')
            self._step_prev_hidden = tf.placeholder(tf.float32,
                                                    shape=(None,
                                                           self._hidden_dim),
                                                    name='step_prev_hidden')

            self._gru_layer = GRULayer(
                self._feature,
                self._feature_shape,
                hidden_units=self._hidden_dim,
                hidden_nonlin=self._hidden_nonlin,
                initializer=None,
                hidden_init_trainable=self._hidden_init_trainable)
            self._gru_flat_layer = ReshapeLayer(
                self._gru_layer.output,
                (self._hidden_dim, ))  # (B*step, hidden_dim)
            self._output_flat_layer = AffineLayer(
                self._gru_flat_layer.output,
                self._gru_flat_layer.output_shape,
                output_shape=(self._output_dim, ),
                Winitializer=None,
                binitializer=None)

            self._output = tf.reshape(
                self._output_flat_layer.output,
                tf.pack((tf.shape(self.input_B_T_Di)[0],
                         tf.shape(self.input_B_T_Di)[1], -1)))
            self._output_shape = (self._output_flat_layer.output_shape[-1], )
            self._step_hidden_layer = self._gru_layer.step_layer(
                self._step_input, self._step_prev_hidden)
            self._step_output = tf.matmul(self._step_hidden_layer.output,
                                          self._output_flat_layer.W_Di_Do
                                          ) + self._output_flat_layer.b_1_Do
            self._hid_init = self._gru_layer.h0
Example #11
0
    def __init__(self, input_B_Di, input_shape, layerspec_json, debug=False):
        """
        Args:
            layerspec (string): JSON string describing layers
        """
        assert len(input_shape) >= 1
        self.input_B_Di = input_B_Di

        layerspec = json.loads(layerspec_json)
        if debug:
            util.ok('Loading feedforward net specification')
            util.header(json.dumps(layerspec, indent=2,
                                   separators=(',', ': ')))

        self.layers = []
        with tf.variable_scope(type(self).__name__) as self.varscope:

            prev_output, prev_output_shape = input_B_Di, input_shape

            for i_layer, ls in enumerate(layerspec):
                with tf.variable_scope('layer_%d' % i_layer):
                    if ls['type'] == 'reshape':
                        _check_keys(ls, ['type', 'new_shape'], [])
                        self.layers.append(
                            ReshapeLayer(prev_output,
                                         ls['new_shape'],
                                         debug=debug))

                    elif ls['type'] == 'flatten':
                        _check_keys(ls, ['type'], [])
                        self.layers.append(
                            FlattenLayer(prev_output, debug=debug))

                    elif ls['type'] == 'fc':
                        _check_keys(ls, ['type', 'n'], ['initializer'])
                        self.layers.append(
                            AffineLayer(prev_output,
                                        prev_output_shape,
                                        output_shape=(ls['n'], ),
                                        Winitializer=_parse_initializer(ls),
                                        binitializer=None,
                                        debug=debug))

                    elif ls['type'] == 'conv':
                        _check_keys(ls, [
                            'type', 'chanout', 'filtsize', 'stride', 'padding'
                        ], ['initializer'])
                        self.layers.append(
                            ConvLayer(input_B_Ih_Iw_Ci=prev_output,
                                      input_shape=prev_output_shape,
                                      Co=ls['chanout'],
                                      Fh=ls['filtsize'],
                                      Fw=ls['filtsize'],
                                      Sh=ls['stride'],
                                      Sw=ls['stride'],
                                      padding=ls['padding'],
                                      initializer=_parse_initializer(ls)))

                    elif ls['type'] == 'nonlin':
                        _check_keys(ls, ['type', 'func'], [])
                        self.layers.append(
                            NonlinearityLayer(prev_output,
                                              prev_output_shape,
                                              ls['func'],
                                              debug=debug))

                    else:
                        raise NotImplementedError('Unknown layer type %s' %
                                                  ls['type'])

                prev_output, prev_output_shape = self.layers[
                    -1].output, self.layers[-1].output_shape
                self._output, self._output_shape = prev_output, prev_output_shape
Example #12
0
 def __init__(self, input_, new_shape, debug=False):
     self._output_shape = tuple(new_shape)
     if debug:
         util.header('Reshape(new_shape=%s)' % (str(self._output_shape), ))
     with tf.variable_scope(type(self).__name__) as self.varscope:
         self._output = tf.reshape(input_, (-1, ) + self._output_shape)
Example #13
0
def phase_train(spec, spec_file, git_hash):
    util.header('=== Running {} ==='.format(spec_file))

    # Make checkpoint dir. All outputs go here
    storagedir = spec['options']['storagedir']
    n_workers = spec['options']['n_workers']
    checkptdir = os.path.join(spec['options']['storagedir'], spec['options']['checkpt_subdir'])
    util.mkdir_p(checkptdir)
    assert not os.listdir(checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir)

    cmd_templates, output_filenames, argdicts = [], [], []
    train_spec = spec['training']
    arg_spec = spec['arguments']
    for alg in train_spec['algorithms']:
        for bline in train_spec['baselines']:
            for parch in train_spec['policy_archs']:
                for barch in train_spec['baseline_archs']:
                    for rad in arg_spec['radius']:
                        for n_se in arg_spec['n_sensors']:
                            for srange in arg_spec['sensor_ranges']:
                                for n_ev in arg_spec['n_evaders']:
                                    for n_pu in arg_spec['n_pursuers']:
                                        for n_co in arg_spec['n_coop']:
                                            if n_co > n_pu:
                                                continue
                                            for n_po in arg_spec['n_poison']:
                                                for f_rew in arg_spec['food_reward']:
                                                    for p_rew in arg_spec['poison_reward']:
                                                        for e_rew in arg_spec['encounter_reward']:
                                                            for disc in arg_spec['discounts']:
                                                                for gae in arg_spec['gae_lambdas']:
                                                                    for run in range(train_spec[
                                                                            'runs']):
                                                                        strid = (
                                                                            'alg={},bline={},parch={},barch={},'.
                                                                            format(alg['name'],
                                                                                   bline, parch,
                                                                                   barch) +
                                                                            'rad={},n_se={},srange={},n_ev={},n_pu={},n_co={},n_po={},'.
                                                                            format(rad, n_se,
                                                                                   srange, n_ev,
                                                                                   n_pu, n_co, n_po)
                                                                            +
                                                                            'f_rew={},p_rew={},e_rew={},'.
                                                                            format(f_rew, p_rew,
                                                                                   e_rew) +
                                                                            'disc={},gae={},run={}'.
                                                                            format(disc, gae, run))
                                                                        cmd_templates.append(alg[
                                                                            'cmd'].replace(
                                                                                '\n', ' ').strip())
                                                                        output_filenames.append(
                                                                            strid + '.txt')
                                                                        argdicts.append({
                                                                            'baseline_type': bline,
                                                                            'radius': rad,
                                                                            'sensor_range': srange,
                                                                            'n_sensors': n_se,
                                                                            'n_pursuers': n_pu,
                                                                            'n_evaders': n_ev,
                                                                            'n_coop': n_co,
                                                                            'n_poison': n_po,
                                                                            'discount': disc,
                                                                            'food_reward': f_rew,
                                                                            'poison_reward': p_rew,
                                                                            'encounter_reward':
                                                                                e_rew,
                                                                            'gae_lambda': gae,
                                                                            'policy_arch': parch,
                                                                            'baseline_arch': barch,
                                                                            'log': os.path.join(
                                                                                checkptdir,
                                                                                strid + '.h5')
                                                                        })

    util.ok('{} jobs to run...'.format(len(cmd_templates)))
    util.warn('Continue? y/n')
    if input() == 'y':
        pipeline.run_jobs(cmd_templates, output_filenames, argdicts, storagedir,
                          jobname=os.path.split(spec_file)[-1], n_workers=n_workers)
        sys.exit(0)
    else:
        util.failure('Canceled.')
        sys.exit(1)

    # Copy the pipeline yaml file to the output dir too
    shutil.copyfile(spec_file, os.path.join(checkptdir, 'pipeline.yaml'))
    with open(os.path.join(checkptdir, 'git_hash.txt'), 'w') as f:
        f.write(git_hash + '\n')
Example #14
0
    def __init__(self, env, args):
        self.args = args
        env, policies, policy = rltools_envpolicy_parser(env, args)
        if args.baseline_type == 'linear':
            if args.control == 'concurrent':
                baselines = [LinearFeatureBaseline(env.agents[agid].observation_space,
                                                   enable_obsnorm=args.enable_obsnorm,
                                                   varscope_name='baseline_{}'.format(agid))
                             for agid in range(len(env.agents))]
            else:
                baseline = LinearFeatureBaseline(policy.observation_space,
                                                 enable_obsnorm=args.enable_obsnorm,
                                                 varscope_name='baseline')

        elif args.baseline_type == 'mlp':
            if args.control == 'concurrent':
                baselines = [MLPBaseline(env.agents[agid].observation_space,
                                         hidden_spec=args.baseline_hidden_spec,
                                         enable_obsnorm=args.enable_obsnorm,
                                         enable_vnorm=args.enable_vnorm, max_kl=args.vf_max_kl,
                                         damping=args.vf_cg_damping,
                                         time_scale=1. / args.max_traj_len,
                                         varscope_name='{}_baseline'.format(agid))
                             for agid in range(len(env.agents))]
            else:
                baseline = MLPBaseline(policy.observation_space,
                                       hidden_spec=args.baseline_hidden_spec,
                                       enable_obsnorm=args.enable_obsnorm,
                                       enable_vnorm=args.enable_vnorm, max_kl=args.vf_max_kl,
                                       damping=args.vf_cg_damping,
                                       time_scale=1. / args.max_traj_len, varscope_name='baseline')

        elif args.baseline_type == 'zero':
            if args.control == 'concurrent':
                baselines = [ZeroBaseline(env.agents[agid].observation_space)
                             for agid in range(len(env.agents))]
            else:
                baseline = ZeroBaseline(policy.observation_space)
        else:
            raise NotImplementedError()

        if args.sampler == 'simple':
            if args.control == 'centralized':
                sampler_cls = SimpleSampler
            elif args.control == 'decentralized':
                sampler_cls = DecSampler
            elif args.control == 'concurrent':
                sampler_cls = ConcSampler
            else:
                raise NotImplementedError()
            sampler_args = dict(max_traj_len=args.max_traj_len, n_timesteps=args.n_timesteps,
                                n_timesteps_min=args.n_timesteps_min,
                                n_timesteps_max=args.n_timesteps_max,
                                timestep_rate=args.timestep_rate, adaptive=args.adaptive_batch,
                                enable_rewnorm=args.enable_rewnorm)
        elif args.sampler == 'parallel':
            sampler_cls = ParallelSampler
            sampler_args = dict(max_traj_len=args.max_traj_len, n_timesteps=args.n_timesteps,
                                n_timesteps_min=args.n_timesteps_min,
                                n_timesteps_max=args.n_timesteps_max,
                                timestep_rate=args.timestep_rate, adaptive=args.adaptive_batch,
                                enable_rewnorm=args.enable_rewnorm, n_workers=args.sampler_workers,
                                mode=args.control, discard_extra=False)

        else:
            raise NotImplementedError()

        step_func = TRPO(max_kl=args.max_kl)
        if args.control == 'concurrent':
            self.algo = ConcurrentPolicyOptimizer(env=env, policies=policies, baselines=baselines,
                                                  step_func=step_func, discount=args.discount,
                                                  gae_lambda=args.gae_lambda,
                                                  sampler_cls=sampler_cls,
                                                  sampler_args=sampler_args, n_iter=args.n_iter,
                                                  target_policy=policy,
                                                  interp_alpha=args.interp_alpha)
        else:
            self.algo = SamplingPolicyOptimizer(env=env, policy=policy, baseline=baseline,
                                                step_func=step_func, discount=args.discount,
                                                gae_lambda=args.gae_lambda, sampler_cls=sampler_cls,
                                                sampler_args=sampler_args, n_iter=args.n_iter)

        argstr = json.dumps(vars(args), separators=(',', ':'), indent=2)
        util.header(argstr)
        self.log_f = log.TrainingLog(args.log, [('args', argstr)], debug=args.debug)