def __init__(self, env_spec, o_ph, a_ph, name="q_function", hidden_sizes=[ 32, 32, ], activation=tf.nn.relu, output_activation=None): self._env_spec = env_spec self._name = name # TODO: Dynamically support disc. or cont action space with tf.variable_scope(self._name): x = tf.concat([o_ph, a_ph], axis=1) x = mlp(x, hidden_sizes + [1], activation=activation, output_activation=output_activation) x = tf.squeeze(x) self._qf = x
def test_mlp(devs, kv_type): # guarantee the same weight init for each run mx.random.seed(0) logging.basicConfig(level=logging.DEBUG) (train, val) = common.mnist(batch_size=100, input_shape=(784,)) # train model = mx.model.FeedForward.create( symbol=common.mlp(), ctx=devs, X=train, num_epoch=4, learning_rate=0.1, wd=0.0004, momentum=0.9, kvstore=kv_type ) return common.accuracy(model, val)
def test_mlp(devs, kv_type): # guarantee the same weight init for each run mx.random.seed(0) logging.basicConfig(level=logging.DEBUG) (train, val) = common.mnist(batch_size=100, input_shape=(784, )) # train model = mx.model.FeedForward.create(symbol=common.mlp(), ctx=devs, X=train, num_epoch=4, learning_rate=0.1, wd=0.0004, momentum=0.9, kvstore=kv_type) return common.accuracy(model, val)
def __init__( self, env_spec, o_ph, hidden_sizes=[32,32,], activation = tf.nn.relu, output_activation = None, name='value_function', ): self._env_spec = env_spec self._name = name # Note: Do we neeed to create internal referemces for hidden_sizes, # activation, output_activation and such # Building the VF graph with tf.variable_scope( self._name): x = o_ph x = mlp( x, hidden_sizes+[1], activation = activation, output_activation = output_activation) x = tf.squeeze( x) # TODO: Unlegant, upgrade self._vf = x
#!/usr/bin/env python import mxnet as mx import logging import common mx.random.seed(0) logging.basicConfig(level=logging.DEBUG) kv = mx.kvstore.create('dist_async') (train, val) = common.mnist(num_parts = kv.num_workers, part_index = kv.rank, batch_size = 100, input_shape = (784,)) # train model = mx.model.FeedForward.create( symbol = common.mlp(), ctx = mx.cpu(), X = train, num_round = 4, learning_rate = 0.05, wd = 0.0004, momentum = 0.9, kvstore = kv) common.accuracy(model, val)
#!/usr/bin/env python import mxnet as mx import logging import common mx.random.seed(0) logging.basicConfig(level=logging.DEBUG) kv = mx.kvstore.create('dist_sync') # feed each machine the whole data (train, val) = common.mnist(batch_size=100, input_shape=(784, )) # train model = mx.model.FeedForward.create(symbol=common.mlp(), ctx=mx.cpu(), X=train, num_epoch=4, learning_rate=0.1, wd=0.0004, momentum=0.9, kvstore=kv) common.accuracy(model, val)
#!/usr/bin/env python import common import mxnet as mx import logging mx.random.seed(0) logging.basicConfig(level=logging.DEBUG) kv = mx.kvstore.create('dist_sync') # feed each machine the whole data (train, val) = common.mnist(batch_size = 100, input_shape = (784,)) # train model = mx.model.FeedForward.create( symbol = common.mlp(), ctx = mx.cpu(), X = train, num_epoch = 4, epoch_size = 60000 / 100, learning_rate = 0.1, wd = 0.0004, momentum = 0.9, kvstore = kv) common.accuracy(model, val)