def __init__(self, input_B_Ih_Iw_Ci, input_shape, Co, Fh, Fw, Sh, Sw, padding, initializer): assert len(input_shape) == 3 Ih, Iw, Ci = input_shape if padding == 'SAME': Oh = np.ceil(float(Ih) / float(Sh)) Ow = np.ceil(float(Iw) / float(Sw)) elif padding == 'VALID': Oh = np.ceil(float(Ih - Fh + 1) / float(Sh)) Ow = np.ceil(float(Iw - Fw + 1) / float(Sw)) util.header( 'Conv(chanin=%d, chanout=%d, filth=%d, filtw=%d, outh=%d, outw=%d, strideh=%d, stridew=%d, padding=%s)' % (Ci, Co, Fh, Fw, Oh, Ow, Sh, Sw, padding)) self._output_shape = (Oh, Ow, Co) with tf.variable_scope(type(self).__name__) as self.varscope: if initializer is None: initializer = tf.contrib.layers.xavier_initializer() self.W_Fh_Fw_Ci_Co = tf.get_variable('W', shape=[Fh, Fw, Ci, Co], initializer=initializer) self.b_1_1_1_Co = tf.get_variable( 'b', shape=[1, 1, 1, Co], initializer=tf.constant_initializer(0.)) self.output_B_Oh_Ow_Co = tf.nn.conv2d( input_B_Ih_Iw_Ci, self.W_Fh_Fw_Ci_Co, [1, Sh, Sw, 1], padding) + self.b_1_1_1_Co
def __init__(self, input_B_Di, input_shape, output_shape, Winitializer, binitializer, debug=False): assert len(input_shape) == len(output_shape) == 1 if debug: util.header('Affine(in=%d, out=%d)' % (input_shape[0], output_shape[0])) self._output_shape = (output_shape[0], ) with tf.variable_scope(type(self).__name__) as self.varscope: if Winitializer is None: Winitializer = tf.contrib.layers.xavier_initializer() if binitializer is None: binitializer = tf.zeros_initializer self.W_Di_Do = tf.get_variable( 'W', shape=[input_shape[0], output_shape[0]], initializer=Winitializer) self.b_1_Do = tf.get_variable('b', shape=[1, output_shape[0]], initializer=binitializer) self.output_B_Do = tf.matmul(input_B_Di, self.W_Di_Do) + self.b_1_Do
def __init__(self, input_B_Di, output_shape, func): util.header('Nonlinearity(func=%s)' % func) self._output_shape = output_shape with tf.variable_scope(type(self).__name__) as self.varscope: self.output_B_Do = { 'relu': tf.nn.relu, 'elu': tf.nn.elu, 'tanh': tf.tanh }[func](input_B_Di)
def __init__(self, input_, outdim=2, debug=False): assert outdim >= 1 self._outdim = outdim input_shape = tuple(input_.get_shape().as_list()) to_flatten = input_shape[self._outdim - 1:] if any(s is None for s in to_flatten): flattened = None else: flattened = int(np.prod(to_flatten)) self._output_shape = input_shape[1:self._outdim - 1] + (flattened, ) if debug: util.header('Flatten(new_shape=%s)' % str(self._output_shape)) pre_shape = tf.shape(input_)[:self._outdim - 1:] to_flatten = tf.reduce_prod(tf.shape(input_)[self._outdim - 1:]) self._output = tf.reshape( input_, tf.concat(0, [pre_shape, tf.pack([to_flatten])]))
def load_h5(self, sess, h5file, key): with h5py.File(h5file, 'r') as f: dset = f[key] ops = [] for v in self.get_variables(): util.header('Reading {}'.format(v.name)) if v.name in dset: ops.append(v.assign(dset[v.name][...])) else: raise RuntimeError('Variable {} not found in {}'.format( v.name, dset)) sess.run(ops) h = self.savehash(sess) assert h == dset[self.varscope.name].attrs[ 'hash'], 'Checkpoint hash {} does not match loaded hash {}'.format( dset[self.varscope.name].attrs['hash'], h)
def __init__(self, input_B_Di, input_shape, output_shape, initializer): assert len(input_shape) == len(output_shape) == 1 util.header('Affine(in=%d, out=%d)' % (input_shape[0], output_shape[0])) self._output_shape = (output_shape[0], ) with tf.variable_scope(type(self).__name__) as self.varscope: if initializer is None: # initializer = tf.truncated_normal_initializer(mean=0., stddev=np.sqrt(2./input_shape[0])) initializer = tf.contrib.layers.xavier_initializer() self.W_Di_Do = tf.get_variable( 'W', shape=[input_shape[0], output_shape[0]], initializer=initializer) self.b_1_Do = tf.get_variable( 'b', shape=[1, output_shape[0]], initializer=tf.constant_initializer(0.)) self.output_B_Do = tf.matmul(input_B_Di, self.W_Di_Do) + self.b_1_Do
def __init__(self, input_B_Ih_Iw_Ci, input_shape, Co, Fh, Fw, Oh, Ow, Sh, Sw, padding, initializer): # TODO: calculate Oh and Ow from the other stuff. assert len(input_shape) == 3 Ci = input_shape[2] util.header( 'Conv(chanin=%d, chanout=%d, filth=%d, filtw=%d, outh=%d, outw=%d, strideh=%d, stridew=%d, padding=%s)' % (Ci, Co, Fh, Fw, Oh, Ow, Sh, Sw, padding)) self._output_shape = (Oh, Ow, Co) with tf.variable_scope(type(self).__name__) as self.varscope: if initializer is None: # initializer = tf.truncated_normal_initializer(mean=0., stddev=np.sqrt(2./(Fh*Fw*Ci))) initializer = tf.contrib.layers.xavier_initializer() self.W_Fh_Fw_Ci_Co = tf.get_variable('W', shape=[Fh, Fw, Ci, Co], initializer=initializer) self.b_1_1_1_Co = tf.get_variable( 'b', shape=[1, 1, 1, Co], initializer=tf.constant_initializer(0.)) self.output_B_Oh_Ow_Co = tf.nn.conv2d( input_B_Ih_Iw_Ci, self.W_Fh_Fw_Ci_Co, [1, Sh, Sw, 1], padding) + self.b_1_1_1_Co
def phase_train(spec, spec_file, git_hash): util.header('=== Running {} ==='.format(spec_file)) # Make checkpoint dir. All outputs go here storagedir = spec['options']['storagedir'] n_workers = spec['options']['n_workers'] checkptdir = os.path.join(spec['options']['storagedir'], spec['options']['checkpt_subdir']) util.mkdir_p(checkptdir) assert not os.listdir( checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir) cmd_templates, output_filenames, argdicts = [], [], [] train_spec = spec['training'] arg_spec = spec['arguments'] for alg in train_spec['algorithms']: for bline in train_spec['baselines']: for parch in train_spec['policy_archs']: for barch in train_spec['baseline_archs']: for rad in arg_spec['radius']: for n_se in arg_spec['n_sensors']: for srange in arg_spec['sensor_ranges']: for n_ev in arg_spec['n_evaders']: for n_pu in arg_spec['n_pursuers']: for n_co in arg_spec['n_coop']: if n_co > n_pu: continue for n_po in arg_spec['n_poison']: for f_rew in arg_spec[ 'food_reward']: for p_rew in arg_spec[ 'poison_reward']: for e_rew in arg_spec[ 'encounter_reward']: for disc in arg_spec[ 'discounts']: for gae in arg_spec[ 'gae_lambdas']: for run in range( train_spec[ 'runs'] ): strid = ( 'alg={},bline={},parch={},barch={},' . format( alg['name'], bline, parch, barch ) + 'rad={},n_se={},srange={},n_ev={},n_pu={},n_co={},n_po={},' . format( rad, n_se, srange, n_ev, n_pu, n_co, n_po ) + 'f_rew={},p_rew={},e_rew={},' . format( f_rew, p_rew, e_rew ) + 'disc={},gae={},run={}' . format( disc, gae, run )) cmd_templates.append( alg['cmd'] . replace( '\n', ' ' ). strip( )) output_filenames.append( strid + '.txt' ) argdicts.append({ 'baseline_type': bline, 'radius': rad, 'sensor_range': srange, 'n_sensors': n_se, 'n_pursuers': n_pu, 'n_evaders': n_ev, 'n_coop': n_co, 'n_poison': n_po, 'discount': disc, 'food_reward': f_rew, 'poison_reward': p_rew, 'encounter_reward': e_rew, 'gae_lambda': gae, 'policy_arch': parch, 'baseline_arch': barch, 'log': os. path . join( checkptdir, strid + '.h5' ) }) util.ok('{} jobs to run...'.format(len(cmd_templates))) util.warn('Continue? y/n') if input() == 'y': pipeline.run_jobs(cmd_templates, output_filenames, argdicts, storagedir, jobname=os.path.split(spec_file)[-1], n_workers=n_workers) sys.exit(0) else: util.failure('Canceled.') sys.exit(1) # Copy the pipeline yaml file to the output dir too shutil.copyfile(spec_file, os.path.join(checkptdir, 'pipeline.yaml')) with open(os.path.join(checkptdir, 'git_hash.txt'), 'w') as f: f.write(git_hash + '\n')
def __init__(self, env, args): self.args = args env, policies, policy = rltools_envpolicy_parser(env, args) if args.baseline_type == 'linear': if args.control == 'concurrent': baselines = [ LinearFeatureBaseline(env.agents[agid].observation_space, enable_obsnorm=args.enable_obsnorm, varscope_name='baseline_{}'.format(agid)) for agid in range(len(env.agents)) ] else: baseline = LinearFeatureBaseline(policy.observation_space, enable_obsnorm=args.enable_obsnorm, varscope_name='baseline') elif args.baseline_type == 'mlp': if args.control == 'concurrent': baselines = [ MLPBaseline(env.agents[agid].observation_space, hidden_spec=args.baseline_hidden_spec, enable_obsnorm=args.enable_obsnorm, enable_vnorm=args.enable_vnorm, max_kl=args.vf_max_kl, damping=args.vf_cg_damping, time_scale=1. / args.max_traj_len, varscope_name='{}_baseline'.format(agid)) for agid in range(len(env.agents)) ] else: baseline = MLPBaseline(policy.observation_space, hidden_spec=args.baseline_hidden_spec, enable_obsnorm=args.enable_obsnorm, enable_vnorm=args.enable_vnorm, max_kl=args.vf_max_kl, damping=args.vf_cg_damping, time_scale=1. / args.max_traj_len, varscope_name='baseline') elif args.baseline_type == 'zero': if args.control == 'concurrent': baselines = [ ZeroBaseline(env.agents[agid].observation_space) for agid in range(len(env.agents)) ] else: baseline = ZeroBaseline(policy.observation_space) else: raise NotImplementedError() if args.sampler == 'simple': if args.control == 'centralized': sampler_cls = SimpleSampler elif args.control == 'decentralized': sampler_cls = DecSampler elif args.control == 'concurrent': sampler_cls = ConcSampler else: raise NotImplementedError() sampler_args = dict(max_traj_len=args.max_traj_len, n_timesteps=args.n_timesteps, n_timesteps_min=args.n_timesteps_min, n_timesteps_max=args.n_timesteps_max, timestep_rate=args.timestep_rate, adaptive=args.adaptive_batch, enable_rewnorm=args.enable_rewnorm) elif args.sampler == 'parallel': sampler_cls = ParallelSampler sampler_args = dict(max_traj_len=args.max_traj_len, n_timesteps=args.n_timesteps, n_timesteps_min=args.n_timesteps_min, n_timesteps_max=args.n_timesteps_max, timestep_rate=args.timestep_rate, adaptive=args.adaptive_batch, enable_rewnorm=args.enable_rewnorm, n_workers=args.sampler_workers, mode=args.control, discard_extra=False) else: raise NotImplementedError() step_func = TRPO(max_kl=args.max_kl) if args.control == 'concurrent': self.algo = ConcurrentPolicyOptimizer(env=env, policies=policies, baselines=baselines, step_func=step_func, discount=args.discount, gae_lambda=args.gae_lambda, sampler_cls=sampler_cls, sampler_args=sampler_args, n_iter=args.n_iter, target_policy=policy, interp_alpha=args.interp_alpha) else: self.algo = SamplingPolicyOptimizer(env=env, policy=policy, baseline=baseline, step_func=step_func, discount=args.discount, gae_lambda=args.gae_lambda, sampler_cls=sampler_cls, sampler_args=sampler_args, n_iter=args.n_iter) argstr = json.dumps(vars(args), separators=(',', ':'), indent=2) util.header(argstr) self.log_f = log.TrainingLog(args.log, [('args', argstr)], debug=args.debug)
def __init__( self, input_B_T_Di, input_shape, output_dim, layer_specjson, # hidden_dim, output_dim, hidden_nonlin=tf.nn.relu, # hidden_init_trainable=False debug=False): layerspec = json.loads(layer_specjson) if debug: util.ok('Loading GRUNet specification') util.header(json.dumps(layerspec, indent=2, separators=(',', ': '))) self._hidden_dim = layerspec['gru_hidden_dim'] self._hidden_nonlin = { 'relu': tf.nn.relu, 'elu': tf.nn.elu, 'tanh': tf.tanh, 'identity': tf.identity }[layerspec['gru_hidden_nonlin']] self._hidden_init_trainable = layerspec['gru_hidden_init_trainable'] self._output_dim = output_dim assert len(input_shape) >= 1 # input_shape is Di self.input_B_T_Di = input_B_T_Di with tf.variable_scope(type(self).__name__) as self.varscope: if 'feature_net' in layerspec: _feature_net = FeedforwardNet(input_B_T_Di, input_shape, layerspec['feature_net']) self._feature_shape = _feature_net.output_shape self._feature = tf.reshape( _feature_net.output, tf.pack([ tf.shape(self.input_B_T_Di)[0], tf.shape(self.input_B_T_Di)[1], self._feature_shape[-1] ])) else: self._feature_shape = input_shape self._feature = input_B_T_Di self._step_input = tf.placeholder(tf.float32, shape=(None, ) + self._feature_shape, name='step_input') self._step_prev_hidden = tf.placeholder(tf.float32, shape=(None, self._hidden_dim), name='step_prev_hidden') self._gru_layer = GRULayer( self._feature, self._feature_shape, hidden_units=self._hidden_dim, hidden_nonlin=self._hidden_nonlin, initializer=None, hidden_init_trainable=self._hidden_init_trainable) self._gru_flat_layer = ReshapeLayer( self._gru_layer.output, (self._hidden_dim, )) # (B*step, hidden_dim) self._output_flat_layer = AffineLayer( self._gru_flat_layer.output, self._gru_flat_layer.output_shape, output_shape=(self._output_dim, ), Winitializer=None, binitializer=None) self._output = tf.reshape( self._output_flat_layer.output, tf.pack((tf.shape(self.input_B_T_Di)[0], tf.shape(self.input_B_T_Di)[1], -1))) self._output_shape = (self._output_flat_layer.output_shape[-1], ) self._step_hidden_layer = self._gru_layer.step_layer( self._step_input, self._step_prev_hidden) self._step_output = tf.matmul(self._step_hidden_layer.output, self._output_flat_layer.W_Di_Do ) + self._output_flat_layer.b_1_Do self._hid_init = self._gru_layer.h0
def __init__(self, input_B_Di, input_shape, layerspec_json, debug=False): """ Args: layerspec (string): JSON string describing layers """ assert len(input_shape) >= 1 self.input_B_Di = input_B_Di layerspec = json.loads(layerspec_json) if debug: util.ok('Loading feedforward net specification') util.header(json.dumps(layerspec, indent=2, separators=(',', ': '))) self.layers = [] with tf.variable_scope(type(self).__name__) as self.varscope: prev_output, prev_output_shape = input_B_Di, input_shape for i_layer, ls in enumerate(layerspec): with tf.variable_scope('layer_%d' % i_layer): if ls['type'] == 'reshape': _check_keys(ls, ['type', 'new_shape'], []) self.layers.append( ReshapeLayer(prev_output, ls['new_shape'], debug=debug)) elif ls['type'] == 'flatten': _check_keys(ls, ['type'], []) self.layers.append( FlattenLayer(prev_output, debug=debug)) elif ls['type'] == 'fc': _check_keys(ls, ['type', 'n'], ['initializer']) self.layers.append( AffineLayer(prev_output, prev_output_shape, output_shape=(ls['n'], ), Winitializer=_parse_initializer(ls), binitializer=None, debug=debug)) elif ls['type'] == 'conv': _check_keys(ls, [ 'type', 'chanout', 'filtsize', 'stride', 'padding' ], ['initializer']) self.layers.append( ConvLayer(input_B_Ih_Iw_Ci=prev_output, input_shape=prev_output_shape, Co=ls['chanout'], Fh=ls['filtsize'], Fw=ls['filtsize'], Sh=ls['stride'], Sw=ls['stride'], padding=ls['padding'], initializer=_parse_initializer(ls))) elif ls['type'] == 'nonlin': _check_keys(ls, ['type', 'func'], []) self.layers.append( NonlinearityLayer(prev_output, prev_output_shape, ls['func'], debug=debug)) else: raise NotImplementedError('Unknown layer type %s' % ls['type']) prev_output, prev_output_shape = self.layers[ -1].output, self.layers[-1].output_shape self._output, self._output_shape = prev_output, prev_output_shape
def __init__(self, input_, new_shape, debug=False): self._output_shape = tuple(new_shape) if debug: util.header('Reshape(new_shape=%s)' % (str(self._output_shape), )) with tf.variable_scope(type(self).__name__) as self.varscope: self._output = tf.reshape(input_, (-1, ) + self._output_shape)
def phase_train(spec, spec_file, git_hash): util.header('=== Running {} ==='.format(spec_file)) # Make checkpoint dir. All outputs go here storagedir = spec['options']['storagedir'] n_workers = spec['options']['n_workers'] checkptdir = os.path.join(spec['options']['storagedir'], spec['options']['checkpt_subdir']) util.mkdir_p(checkptdir) assert not os.listdir(checkptdir), 'Checkpoint directory {} is not empty!'.format(checkptdir) cmd_templates, output_filenames, argdicts = [], [], [] train_spec = spec['training'] arg_spec = spec['arguments'] for alg in train_spec['algorithms']: for bline in train_spec['baselines']: for parch in train_spec['policy_archs']: for barch in train_spec['baseline_archs']: for rad in arg_spec['radius']: for n_se in arg_spec['n_sensors']: for srange in arg_spec['sensor_ranges']: for n_ev in arg_spec['n_evaders']: for n_pu in arg_spec['n_pursuers']: for n_co in arg_spec['n_coop']: if n_co > n_pu: continue for n_po in arg_spec['n_poison']: for f_rew in arg_spec['food_reward']: for p_rew in arg_spec['poison_reward']: for e_rew in arg_spec['encounter_reward']: for disc in arg_spec['discounts']: for gae in arg_spec['gae_lambdas']: for run in range(train_spec[ 'runs']): strid = ( 'alg={},bline={},parch={},barch={},'. format(alg['name'], bline, parch, barch) + 'rad={},n_se={},srange={},n_ev={},n_pu={},n_co={},n_po={},'. format(rad, n_se, srange, n_ev, n_pu, n_co, n_po) + 'f_rew={},p_rew={},e_rew={},'. format(f_rew, p_rew, e_rew) + 'disc={},gae={},run={}'. format(disc, gae, run)) cmd_templates.append(alg[ 'cmd'].replace( '\n', ' ').strip()) output_filenames.append( strid + '.txt') argdicts.append({ 'baseline_type': bline, 'radius': rad, 'sensor_range': srange, 'n_sensors': n_se, 'n_pursuers': n_pu, 'n_evaders': n_ev, 'n_coop': n_co, 'n_poison': n_po, 'discount': disc, 'food_reward': f_rew, 'poison_reward': p_rew, 'encounter_reward': e_rew, 'gae_lambda': gae, 'policy_arch': parch, 'baseline_arch': barch, 'log': os.path.join( checkptdir, strid + '.h5') }) util.ok('{} jobs to run...'.format(len(cmd_templates))) util.warn('Continue? y/n') if input() == 'y': pipeline.run_jobs(cmd_templates, output_filenames, argdicts, storagedir, jobname=os.path.split(spec_file)[-1], n_workers=n_workers) sys.exit(0) else: util.failure('Canceled.') sys.exit(1) # Copy the pipeline yaml file to the output dir too shutil.copyfile(spec_file, os.path.join(checkptdir, 'pipeline.yaml')) with open(os.path.join(checkptdir, 'git_hash.txt'), 'w') as f: f.write(git_hash + '\n')
def __init__(self, env, args): self.args = args env, policies, policy = rltools_envpolicy_parser(env, args) if args.baseline_type == 'linear': if args.control == 'concurrent': baselines = [LinearFeatureBaseline(env.agents[agid].observation_space, enable_obsnorm=args.enable_obsnorm, varscope_name='baseline_{}'.format(agid)) for agid in range(len(env.agents))] else: baseline = LinearFeatureBaseline(policy.observation_space, enable_obsnorm=args.enable_obsnorm, varscope_name='baseline') elif args.baseline_type == 'mlp': if args.control == 'concurrent': baselines = [MLPBaseline(env.agents[agid].observation_space, hidden_spec=args.baseline_hidden_spec, enable_obsnorm=args.enable_obsnorm, enable_vnorm=args.enable_vnorm, max_kl=args.vf_max_kl, damping=args.vf_cg_damping, time_scale=1. / args.max_traj_len, varscope_name='{}_baseline'.format(agid)) for agid in range(len(env.agents))] else: baseline = MLPBaseline(policy.observation_space, hidden_spec=args.baseline_hidden_spec, enable_obsnorm=args.enable_obsnorm, enable_vnorm=args.enable_vnorm, max_kl=args.vf_max_kl, damping=args.vf_cg_damping, time_scale=1. / args.max_traj_len, varscope_name='baseline') elif args.baseline_type == 'zero': if args.control == 'concurrent': baselines = [ZeroBaseline(env.agents[agid].observation_space) for agid in range(len(env.agents))] else: baseline = ZeroBaseline(policy.observation_space) else: raise NotImplementedError() if args.sampler == 'simple': if args.control == 'centralized': sampler_cls = SimpleSampler elif args.control == 'decentralized': sampler_cls = DecSampler elif args.control == 'concurrent': sampler_cls = ConcSampler else: raise NotImplementedError() sampler_args = dict(max_traj_len=args.max_traj_len, n_timesteps=args.n_timesteps, n_timesteps_min=args.n_timesteps_min, n_timesteps_max=args.n_timesteps_max, timestep_rate=args.timestep_rate, adaptive=args.adaptive_batch, enable_rewnorm=args.enable_rewnorm) elif args.sampler == 'parallel': sampler_cls = ParallelSampler sampler_args = dict(max_traj_len=args.max_traj_len, n_timesteps=args.n_timesteps, n_timesteps_min=args.n_timesteps_min, n_timesteps_max=args.n_timesteps_max, timestep_rate=args.timestep_rate, adaptive=args.adaptive_batch, enable_rewnorm=args.enable_rewnorm, n_workers=args.sampler_workers, mode=args.control, discard_extra=False) else: raise NotImplementedError() step_func = TRPO(max_kl=args.max_kl) if args.control == 'concurrent': self.algo = ConcurrentPolicyOptimizer(env=env, policies=policies, baselines=baselines, step_func=step_func, discount=args.discount, gae_lambda=args.gae_lambda, sampler_cls=sampler_cls, sampler_args=sampler_args, n_iter=args.n_iter, target_policy=policy, interp_alpha=args.interp_alpha) else: self.algo = SamplingPolicyOptimizer(env=env, policy=policy, baseline=baseline, step_func=step_func, discount=args.discount, gae_lambda=args.gae_lambda, sampler_cls=sampler_cls, sampler_args=sampler_args, n_iter=args.n_iter) argstr = json.dumps(vars(args), separators=(',', ':'), indent=2) util.header(argstr) self.log_f = log.TrainingLog(args.log, [('args', argstr)], debug=args.debug)