def _get_DQN_prediction(self, image): """ image: [0,255]""" #image = image / 255.0 with argscope(Conv2D, nl=PReLU.f, use_bias=True): l = Conv2D('conv0', image, out_channel=32, kernel_shape=5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, out_channel=32, kernel_shape=5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, out_channel=64, kernel_shape=3) l = FullyConnected('fc0', l, 512, nl=lambda x, name: LeakyReLU.f(x, 0.01, name)) # the original arch #.Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4) #.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2) #.Conv2D('conv2', out_channel=64, kernel_shape=3) if not DUELING: Q = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity) else: V = FullyConnected('fctV', l, 1, nl=tf.identity) As = FullyConnected('fctA', l, NUM_ACTIONS, nl=tf.identity) Q = tf.add(As, V - tf.reduce_mean(As, 1, keep_dims=True)) return tf.identity(Q, name='Qvalue')
def _get_DQN_prediction(self, image): """ image: [0,255]""" image = image / 255.0 with argscope(Conv2D, nl=PReLU.f, use_bias=True): return ( LinearWrap(image).Conv2D( 'conv0', out_channel=32, kernel_shape=5).MaxPooling('pool0', 2).Conv2D( 'conv1', out_channel=32, kernel_shape=5).MaxPooling( 'pool1', 2).Conv2D('conv2', out_channel=64, kernel_shape=4).MaxPooling( 'pool2', 2).Conv2D('conv3', out_channel=64, kernel_shape=3) # the original arch #.Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4) #.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2) #.Conv2D('conv2', out_channel=64, kernel_shape=3) .FullyConnected('fc0', 512, nl=lambda x, name: LeakyReLU.f(x, 0.01, name) ).FullyConnected('fct', NUM_ACTIONS, nl=tf.identity)())
def _get_DQN_prediction(self, image): #TODO: Do we need to add other pre-processing? e.g., subtract mean image = image / 255.0 #TODO: The network structure can be improved? with argscope(Conv2D, nl=tf.nn.relu, use_bias=True): # Activation for each layer l = Conv2D('conv0', image, out_channel=32, kernel_shape=5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, out_channel=32, kernel_shape=5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=3) # the original arch # .Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4) # .Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2) # .Conv2D('conv2', out_channel=64, kernel_shape=3) l = FullyConnected('fc0', l, 512, nl=lambda x, name: LeakyReLU.f(x, 0.01, name)) l = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity())