def conv_block(x, filters, kernel_size=3, strides=(1, 1), padding='same', kernel_initializer=tf.initializers.variance_scaling, data_format='channels_last', activation=tf.nn.relu, name=None, training=True, reuse=False, batchnorm=True, pool=True): """VGG conv block.""" assert name is not None, 'Give the conv block a name.' activity = tf.layers.conv2d(inputs=x, filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, activation=activation, name='%s_conv' % name, reuse=reuse) if batchnorm: activity = normalization.batch(bottom=activity, name='%s_bn' % name, training=training, reuse=reuse) if pool: return pooling.max_pool(bottom=activity, name='%s_pool' % name) else: return activity
def just_ff(self, x, l0_h1, l0_h2): # HGRU l0_h1, l0_h2 = self.hgru0.run(x, l0_h1, l0_h2) ff = tf.contrib.layers.batch_norm( inputs=l0_h2, scale=True, center=True, fused=True, renorm=False, reuse=False, scope=None, param_initializers=self.bn_param_initializer, updates_collections=None, is_training=self.train) ff = tf.nn.relu(ff) ff_list = [] ff_list.append(ff) # FEEDFORWARD for idx, (conv_fsiz, conv_k, conv_str, pool_fsiz, pool_str) in enumerate( zip(self.ff_conv_fsiz, self.ff_conv_k, self.ff_conv_strides, self.ff_pool_fsiz, self.ff_pool_strides)): with tf.variable_scope(self.var_scope + '/ff_%s' % idx, reuse=tf.AUTO_REUSE): weights = tf.get_variable("weights") # POOL ff = max_pool(bottom=ff, k=[1] + pool_fsiz + [1], s=[1] + pool_str + [1], name='ff_pool_hgru') # CONV ff = tf.nn.conv2d(input=ff, filter=weights, strides=conv_str, padding='SAME') ff = tf.contrib.layers.batch_norm( inputs=ff, scale=True, center=True, fused=True, renorm=False, reuse=False, scope=None, param_initializers=self.bn_param_initializer, updates_collections=None, is_training=self.train) ff = tf.nn.relu(ff) ff_list.append(ff) # GLOBAL POOL and then TILE if self.use_global_pool: top_map_shape = ff_list[-1].get_shape().as_list() ff = global_pool(bottom=ff, name='global_pool', aux={}) return ff
def conv_tower(self, activity, pre_pool, i0): """Build the intermediate conv tower to expand RF size.""" conv_list = [pre_pool] for idx, (filters, reps) in enumerate( zip(self.intermediate_ff, self.intermediate_repeats)): # Build the tower for il in range(reps): activity = tf.nn.conv2d( input=activity, filter=getattr( self, 'intermediate_kernel_%s_%s' % (idx, il)), strides=self.strides, padding=self.padding) activity = tf.nn.bias_add( activity, getattr(self, 'intermediate_bias_%s_%s' % (idx, il))) # activity = self.ff_nl(activity) if self.residual and il == 0: skip_path = tf.identity(activity) elif self.residual and il == (reps - 1): activity += skip_path activity = self.ff_nl(activity) if 1: # idx == ( # Use with the resid cond below ff_scope = 'bn_ff_%s_%s' % (idx, il) if not self.while_loop: ff_scope = '%s_t%s' % (ff_scope, i0) if self.batch_norm: with tf.variable_scope( ff_scope, reuse=self.scope_reuse) as scope: activity = tf.contrib.layers.batch_norm( inputs=activity, scale=False, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse_conv_bn, is_training=self.train) activity = activity * getattr(self, 'intermediate_gamma_%s_%s' % (idx, il)) + getattr(self, 'intermediate_beta_%s_%s' % (idx, il)) # Gather in a list for upsample if idx < (len(self.intermediate_ff) - 1): conv_list += [activity] # Add pools for encoding path activity = max_pool( bottom=activity, k=[1] + self.pool_kernel + [1], s=[1] + self.pool_strides + [1], name='ff_pool_%s' % (idx)) return activity, conv_list
def ff_drive(self, bottom, name): """Compute filter responses for bottom.""" if self.batch_norm: with tf.variable_scope('%s_bn' % name, reuse=self.scope_reuse) as scope: bottom = tf.contrib.layers.batch_norm( inputs=bottom, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) # Pool the preceding layer's drive if self.include_pooling: bottom = max_pool(bottom=bottom, k=[1] + self.pool_kernel + [1], s=[1] + self.pool_strides + [1], name='ff_pool_%s' % name) for idx, (filters, s) in enumerate( zip(self.intermediate_ff, self.intermediate_ks)): bottom = tf.nn.conv2d( input=bottom, filter=getattr(self, 'intermediate_kernel_%s_%s' % (name, idx)), strides=self.strides, padding=self.padding) bottom = tf.nn.bias_add( bottom, getattr(self, 'intermediate_bias_%s_%s' % (name, idx))) bottom = self.ff_nl(bottom) if self.batch_norm: with tf.variable_scope('l1_h2_bn_ff_%s_%s' % (name, idx), reuse=self.scope_reuse) as scope: bottom = tf.contrib.layers.batch_norm( inputs=bottom, scale=True, center=True, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) return bottom
def skinny_input_layer( X, reuse, training, features, conv_kernel_size, pool_kernel_size=False, pool_kernel_strides=False, name='l0', conv_strides=(1, 1), conv_padding='same', conv_activation=tf.nn.relu, var_scope='input_1', data_format='NHWC', pool=False, pool_type='max'): """Input layer for recurrent experiments in Kim et al., 2019.""" if not pool_kernel_size or not pool_kernel_strides: pool = False if data_format == 'NHWC': data_format = 'channels_last' elif data_format == 'NCHW': data_format = 'channels_first' with tf.variable_scope(var_scope, reuse=reuse): in_emb = tf.layers.conv2d( inputs=X, filters=features, kernel_size=conv_kernel_size, name='conv_0_%s' % name, strides=conv_strides, padding=conv_padding, activation=conv_activation, data_format=data_format, trainable=training, use_bias=True) if pool: if pool_type == 'max': in_emb = pooling.max_pool( bottom=in_emb, name='pool_%s' % name, data_format=self.data_format, k=pool_kernel_size, s=pool_kernel_strides) else: raise NotImplementedError return in_emb
def input_layer_v2( X, reuse, training, features, conv_kernel_size, pool_kernel_size=False, pool_kernel_strides=False, name='l0', conv_strides=(1, 1), conv_padding='same', conv_activation=tf.nn.relu, var_scope='input_1', pool=False, pool_type='max'): """Input layer for recurrent experiments in Kim et al., 2019.""" if not pool_kernel_size or not pool_kernel_strides: pool = False with tf.variable_scope(var_scope, reuse=reuse): assert not isinstance(conv_activation, list), 'Pass a single activation fun.' in_emb = tf.layers.conv2d( inputs=X, filters=features, kernel_size=conv_kernel_size, name='conv_0_%s' % name, strides=conv_strides, padding=conv_padding, activation=conv_activation, trainable=training, use_bias=True) if pool: if pool_type == 'max': in_emb = pooling.max_pool( bottom=in_emb, name='pool_%s' % name, k=pool_kernel_size, s=pool_kernel_strides) else: raise NotImplementedError return in_emb
def full(self, i0, x, l1_h2, l2_h2, fb_act_1): """hGRU body. Take the recurrent h2 from a low level and imbue it with information froma high layer. This means to treat the lower layer h2 as the X and the higher layer h2 as the recurrent state. This will serve as I/E from the high layer along with feedback kernels. """ # # LAYER 1 hGRU # FF drive comes from outside recurrent loop l1_h2 = self.hgru_ops(i0=i0, x=x, h2=fb_act_1, layer='h1') l1_h2_scope = 'l1_h2_bn' if not self.while_loop: l1_h2_scope = '%s_t%s' % (l1_h2_scope, i0) if self.batch_norm: with tf.variable_scope(l1_h2_scope, reuse=self.scope_reuse) as scope: l1_h2 = tf.contrib.layers.batch_norm( inputs=l1_h2, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) # Pool the preceding layer's drive if self.include_pooling: l1_h2_pool = max_pool(bottom=l1_h2, k=[1] + self.pool_kernel + [1], s=[1] + self.pool_strides + [1], name='pool_h1') else: l1_h2_pool = l1_h2 # Conv hierarchy for high-level representation activity, conv_list = self.conv_tower(activity=l1_h2_pool, pre_pool=l1_h2, i0=i0) # # LAYER 2 hGRU # hGRU l2_h2 = self.hgru_ops(i0=i0, x=activity, h2=l2_h2, layer='h2') l2_h2_scope = 'l2_h2_bn' if not self.while_loop: l2_h2_scope = '%s_t%s' % (l2_h2_scope, i0) if self.batch_norm: with tf.variable_scope(l2_h2_scope, reuse=self.scope_reuse) as scope: l2_h2 = tf.contrib.layers.batch_norm( inputs=l2_h2, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) activity = l2_h2 # Add Upsamples activity = self.upsample_router(activity=activity, conv_list=conv_list, i0=i0) # # LAYER 1 tdGRU # Feedback from hgru-2 to hgru-1 fb_act_1 = self.td_router(activity=activity, l1_h2=l1_h2, i0=i0) # Iterate loop i0 += 1 return i0, x, l1_h2, l2_h2, fb_act_1
def full(self, i0, x, l0_h1, l0_h2, td0_h1): # HGRU l0_h1, l0_h2 = self.hgru0.run(x, l0_h1, l0_h2) ff = tf.contrib.layers.batch_norm( inputs=l0_h2, scale=True, center=True, fused=True, renorm=False, reuse=False, scope=None, param_initializers=self.bn_param_initializer, updates_collections=None, is_training=self.train) ff = tf.nn.relu(ff) ff_list = [] ff_list.append(ff) # FEEDFORWARD for idx, (conv_fsiz, conv_k, conv_str, pool_fsiz, pool_str) in enumerate( zip(self.ff_conv_fsiz, self.ff_conv_k, self.ff_conv_strides, self.ff_pool_fsiz, self.ff_pool_strides)): with tf.variable_scope(self.var_scope + '/ff_%s' % idx, reuse=tf.AUTO_REUSE): weights = tf.get_variable("weights") # POOL ff = max_pool(bottom=ff, k=[1] + pool_fsiz + [1], s=[1] + pool_str + [1], name='ff_pool_hgru') # CONV ff = tf.nn.conv2d(input=ff, filter=weights, strides=conv_str, padding='SAME') ff = tf.contrib.layers.batch_norm( inputs=ff, scale=True, center=True, fused=True, renorm=False, reuse=False, scope=None, param_initializers=self.bn_param_initializer, updates_collections=None, is_training=self.train) ff = tf.nn.relu(ff) ff_list.append(ff) # GLOBAL POOL and then TILE if self.use_global_pool: top_map_shape = ff_list[-1].get_shape().as_list() ff = global_pool(bottom=ff, name='global_pool', aux={}) ff = tf.tile(tf.expand_dims(tf.expand_dims(ff, 1), 1), [1] + top_map_shape[1:3] + [1]) # TOPDOWN fb = ff if not self.share_ff_td_kernels: scp = 'fb_' else: scp = 'ff_' for idx in range(len(ff_list))[::-1]: if idx != 0: with tf.variable_scope(self.var_scope + '/' + scp + '%s' % (idx - 1), reuse=True): weights = tf.get_variable("weights") fb = self.resize_x_to_y(x=fb, y=ff_list[idx - 1], kernel=weights, mode='transpose', strides=self.ff_pool_strides[idx - 1]) fb = tf.contrib.layers.batch_norm( inputs=fb, scale=True, center=True, fused=True, renorm=False, reuse=False, scope=None, param_initializers=self.bn_param_initializer, updates_collections=None, is_training=self.train) fb = tf.nn.relu(fb) else: with tf.variable_scope(self.var_scope + '/fb_0', reuse=True): weights = tf.get_variable("weights") fb = self.resize_x_to_y(x=fb, y=x, kernel=weights, mode='transpose', strides=[1, 1]) fb = tf.contrib.layers.batch_norm( inputs=fb, scale=True, center=True, fused=True, renorm=False, reuse=False, scope=None, param_initializers=self.bn_param_initializer, updates_collections=None, is_training=self.train) fb = tf.nn.relu(fb) # HGRU_TD td0_h1, l0_h2 = self.hgru_td0.run(fb, td0_h1, l0_h2) # Iterate loop i0 += 1 return i0, x, l0_h1, l0_h2, td0_h1
def build_model(data_tensor, reuse, training, output_shape): """Create the hgru from Learning long-range...""" if isinstance(output_shape, list): output_shape = output_shape[0] elif isinstance(output_shape, dict): nhot_shape = output_shape['aux'] output_shape = output_shape['output'] use_aux = True with tf.variable_scope('cnn', reuse=reuse): # Unclear if we should include l0 in the down/upsample cascade with tf.variable_scope('g1', reuse=reuse): # Downsample act11 = conv_block( x=data_tensor, name='l1_1', filters=64, training=training, reuse=reuse, pool=False) act12 = conv_block( x=act11, name='l1_2', filters=64, training=training, reuse=reuse, pool=False) poolact12 = pooling.max_pool( bottom=act12, name='l1_2_pool') with tf.variable_scope('g2', reuse=reuse): # Downsample act21 = conv_block( x=poolact12, name='l2_1', filters=128, training=training, reuse=reuse, pool=False) act22 = conv_block( x=act21, filters=128, name='l2_2', training=training, reuse=reuse, pool=False) poolact22 = pooling.max_pool( bottom=act22, name='l2_2_pool') with tf.variable_scope('g3', reuse=reuse): # Downsample act31 = conv_block( x=poolact22, name='l3_1', filters=256, training=training, reuse=reuse, pool=False) act32 = conv_block( x=act31, filters=256, name='l3_2', training=training, reuse=reuse, pool=False) act33 = conv_block( x=act32, filters=256, name='l3_3', training=training, reuse=reuse, pool=False) poolact33 = pooling.max_pool( bottom=act33, name='l3_3_pool') with tf.variable_scope('g4', reuse=reuse): # Downsample act41 = conv_block( x=poolact33, name='l4_1', filters=512, training=training, reuse=reuse, pool=False) act42 = conv_block( x=act41, filters=512, name='l4_2', training=training, reuse=reuse, pool=False) act43 = conv_block( x=act42, filters=512, name='l4_3', training=training, reuse=reuse, pool=False) poolact43 = pooling.max_pool( bottom=act43, name='l4_3_pool') with tf.variable_scope('g5', reuse=reuse): # Downsample act51 = conv_block( x=poolact43, name='l5_1', filters=512, training=training, reuse=reuse, pool=False) act52 = conv_block( x=act51, filters=512, name='l5_2', training=training, reuse=reuse, pool=False) act53 = conv_block( x=act52, filters=512, name='l5_3', training=training, reuse=reuse, pool=False) poolact53 = pooling.max_pool( bottom=act53, name='l5_3_pool') with tf.variable_scope('g5_skip', reuse=reuse): upact5 = up_block( inputs=poolact53, skip=act53, up_filters=512, name='ul5', training=training, reuse=reuse) with tf.variable_scope('g4_skip', reuse=reuse): upact4 = up_block( inputs=upact5, skip=act43, up_filters=512, name='ul4', training=training, reuse=reuse) with tf.variable_scope('g3_skip', reuse=reuse): upact3 = up_block( inputs=upact4, skip=act33, up_filters=256, name='ul3', training=training, reuse=reuse) with tf.variable_scope('g2_skip', reuse=reuse): upact2 = up_block( inputs=upact3, skip=act22, up_filters=128, name='ul2', training=training, reuse=reuse) with tf.variable_scope('g1_skip', reuse=reuse): upact1 = up_block( inputs=upact2, skip=act12, up_filters=64, name='ul1', training=training, reuse=reuse) with tf.variable_scope('readout_1', reuse=reuse): activity = conv.conv_layer( bottom=upact1, name='pre_readout_conv', num_filters=2, kernel_size=1, trainable=training, use_bias=False) pool_aux = {'pool_type': 'max'} activity = pooling.global_pool( bottom=activity, name='pre_readout_pool', aux=pool_aux) activity = normalization.batch( bottom=activity, renorm=True, name='readout_1_bn', training=training) with tf.variable_scope('readout_2', reuse=reuse): pre_activity = tf.layers.flatten( activity, name='flat_readout') activity = tf.layers.dense( inputs=pre_activity, units=output_shape) if use_aux: nhot = tf.layers.dense(inputs=pre_activity, units=nhot_shape) else: nhot = tf.constant(0.) extra_activities = { 'activity': activity, 'nhot': nhot } return activity, extra_activities
def full(self, i0, x, l1_h1, l1_h2, l2_h1, l2_h2, fb_act): """hGRU body. Take the recurrent h2 from a low level and imbue it with information froma high layer. This means to treat the lower layer h2 as the X and the higher layer h2 as the recurrent state. This will serve as I/E from the high layer along with feedback kernels. """ # LAYER 1 l1_h1, l1_h2 = self.hgru_ops(i0=i0, x=x, h1=l1_h1, h2=fb_act, layer='h1', layer_idx=0) # Intermediate FF if self.batch_norm: with tf.variable_scope('l1_h2_bn', reuse=self.scope_reuse) as scope: l1_h2 = tf.contrib.layers.batch_norm( inputs=l1_h2, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) # Pool the preceding layer's drive if self.include_pooling: processed_l1_h2 = max_pool(bottom=l1_h2, k=[1] + self.pool_kernel + [1], s=[1] + self.pool_strides + [1], name='ff_pool_%s' % 0) else: processed_l1_h2 = l1_h2 for idx, (filters, s) in enumerate( zip(self.intermediate_ff, self.intermediate_ks)): processed_l1_h2 = tf.nn.conv2d(input=processed_l1_h2, filter=getattr( self, 'intermediate_kernel_%s' % idx), strides=self.strides, padding=self.padding) processed_l1_h2 = tf.nn.bias_add( processed_l1_h2, getattr(self, 'intermediate_bias_%s' % idx)) processed_l1_h2 = self.ff_nl(processed_l1_h2) if self.batch_norm: with tf.variable_scope('l1_h2_bn_ff_%s' % idx, reuse=self.scope_reuse) as scope: processed_l1_h2 = tf.contrib.layers.batch_norm( inputs=processed_l1_h2, scale=True, center=True, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) # LAYER 2 l2_h1, l2_h2 = self.hgru_ops(i0=i0, x=processed_l1_h2, h1=l2_h1, h2=l2_h2, layer='h2', layer_idx=1) if self.batch_norm: with tf.variable_scope('l2_h2_bn', reuse=self.scope_reuse) as scope: l2_h2 = tf.contrib.layers.batch_norm( inputs=l2_h2, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) # Incorporate feedback (FEEDBACK KERNEL is 2x channels) fb_inh, fb_act = self.hgru_ops(i0=i0, x=l1_h2, h1=self.resize_x_to_y(x=l2_h2, y=l1_h2), h2=fb_act, layer='fb', layer_idx=2) # Peephole fb_act += l1_h2 # Iterate loop i0 += 1 return i0, x, l1_h1, l1_h2, l2_h1, l2_h2, fb_act
def full(self, i0, x, l1_h2, l2_h2, fb_act_2, fb_act_1): """hGRU body. Take the recurrent h2 from a low level and imbue it with information froma high layer. This means to treat the lower layer h2 as the X and the higher layer h2 as the recurrent state. This will serve as I/E from the high layer along with feedback kernels. """ # # LAYER 1 hGRU # FF drive comes from outside recurrent loop l1_h1, l1_h2 = self.hgru_ops(i0=i0, x=x, h2=fb_act_1, layer='h1') l1_h2_scope = 'l1_h2_bn' if not self.while_loop: l1_h2_scope = '%s_t%s' % (l1_h2_scope, i0) if self.batch_norm: with tf.variable_scope(l1_h2_scope, reuse=self.scope_reuse) as scope: l1_h2 = tf.contrib.layers.batch_norm( inputs=l1_h2, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) # Pool the preceding layer's drive if self.include_pooling: l1_h2_pool = max_pool(bottom=l1_h2, k=[1] + self.pool_kernel + [1], s=[1] + self.pool_strides + [1], name='pool_h1') else: l1_h2_pool = l1_h2 # # LAYER 2 hGRU # FF drive l2_ff = tf.nn.conv2d(input=l1_h2_pool, filter=getattr(self, 'hgru_2_ff_kernel'), strides=self.strides, padding=self.padding) l2_ff = tf.nn.bias_add(l2_ff, getattr(self, 'hgru_2_ff_bias')) # activity = self.ff_nl(activity) # hGRU l2_h1, l2_h2 = self.hgru_ops(i0=i0, x=l2_ff, h2=fb_act_2, layer='h2') l2_h2_scope = 'l2_h2_bn' if not self.while_loop: l2_h2_scope = '%s_t%s' % (l2_h2_scope, i0) if self.batch_norm: with tf.variable_scope(l2_h2_scope, reuse=self.scope_reuse) as scope: l2_h2 = tf.contrib.layers.batch_norm( inputs=l2_h2, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) # Pool the preceding layer's drive if self.include_pooling: activity = max_pool(bottom=l2_h2, k=[1] + self.pool_kernel + [1], s=[1] + self.pool_strides + [1], name='pool_h2') else: activity = l2_h2 # Conv hierarchy for high-level representation conv_list = [] for idx, (filters, s) in enumerate( zip(self.intermediate_ff, self.intermediate_ks)): activity = tf.nn.conv2d(input=activity, filter=getattr( self, 'intermediate_kernel_%s' % idx), strides=self.strides, padding=self.padding) activity = tf.nn.bias_add( activity, getattr(self, 'intermediate_bias_%s' % idx)) activity = self.ff_nl(activity) ff_scope = 'bn_ff_%s' % idx if not self.while_loop: ff_scope = '%s_t%s' % (ff_scope, i0) if self.batch_norm: with tf.variable_scope(ff_scope, reuse=self.scope_reuse) as scope: activity = tf.contrib.layers.batch_norm( inputs=activity, scale=True, center=True, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse_conv_bn, is_training=self.train) if idx < (len(self.intermediate_ff) - 1) and self.include_pooling: # Gather in a list for upsample conv_list += [activity] # Add pools for encoding path activity = max_pool(bottom=activity, k=[1] + self.pool_kernel + [1], s=[1] + self.pool_strides + [1], name='ff_pool_%s' % (idx)) else: pass # Add Upsamples for idx, target in reversed(list(enumerate(conv_list))): activity = self.resize_x_to_y(x=activity, y=target, name=idx + 2) if self.skip: activity += target # # LAYER 2 tdGRU # Feedback from conv to hgru-2 resized_td = self.resize_x_to_y(x=activity, y=l2_h2, name=1) ff_drive = tf.concat([resized_td, l2_ff], axis=-1) ff_drive = tf.nn.conv2d(input=ff_drive, filter=getattr(self, 'ff_cat_kernel_2'), strides=self.strides, padding=self.padding) ff_drive = tf.nn.bias_add(ff_drive, getattr(self, 'ff_cat_bias_2')) fb_inh_2, fb_act_2 = self.hgru_ops(i0=i0, x=ff_drive, h2=l2_h2, layer='fb2') # TD 2 Batchnorm td_h2_scope = 'td_h2_bn' if not self.while_loop: td_h2_scope = '%s_t%s' % (l1_h2_scope, i0) if self.batch_norm: with tf.variable_scope(td_h2_scope, reuse=self.scope_reuse) as scope: fb_act_2 = tf.contrib.layers.batch_norm( inputs=fb_act_2, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) # Peephole z-scored activities fb_act_2 += l2_h2 # # LAYER 1 tdGRU # Feedback from hgru-2 to hgru-1 resized_td = self.resize_x_to_y(x=fb_act_2, y=x, name=0) ff_drive = tf.concat([resized_td, x], axis=-1) ff_drive = tf.nn.conv2d(input=ff_drive, filter=getattr(self, 'ff_cat_kernel_1'), strides=self.strides, padding=self.padding) ff_drive = tf.nn.bias_add(ff_drive, getattr(self, 'ff_cat_bias_1')) fb_inh_1, fb_act_1 = self.hgru_ops(i0=i0, x=ff_drive, h2=l1_h2, layer='fb1') # TD 1 Batchnorm td_h1_scope = 'td_h1_bn' if not self.while_loop: td_h1_scope = '%s_t%s' % (td_h1_scope, i0) if self.batch_norm: with tf.variable_scope(td_h1_scope, reuse=self.scope_reuse) as scope: fb_act_1 = tf.contrib.layers.batch_norm( inputs=fb_act_1, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) # Peephole z-scored activities fb_act_1 += l1_h2 # Iterate loop i0 += 1 return i0, x, l1_h2, l2_h2, fb_act_2, fb_act_1
def build_model(data_tensor, reuse, training, output_shape): """Create the hgru from Learning long-range...""" if isinstance(output_shape, list): output_shape = output_shape[0] with tf.variable_scope('cnn', reuse=reuse): # Unclear if we should include l0 in the down/upsample cascade with tf.variable_scope('g1', reuse=reuse): # Downsample act11 = conv_block(x=data_tensor, name='l1_1', filters=64, training=training, reuse=reuse, pool=False) act12 = conv_block(x=act11, name='l1_2', filters=64, training=training, reuse=reuse, pool=False) poolact12 = pooling.max_pool(bottom=act12, name='l1_2_pool') with tf.variable_scope('g2', reuse=reuse): # Downsample act21 = conv_block(x=poolact12, name='l2_1', filters=128, training=training, reuse=reuse, pool=False) act22 = conv_block(x=act21, filters=128, name='l2_2', training=training, reuse=reuse, pool=False) poolact22 = pooling.max_pool(bottom=act22, name='l2_2_pool') with tf.variable_scope('g3', reuse=reuse): # Downsample act31 = conv_block(x=poolact22, name='l3_1', filters=256, training=training, reuse=reuse, pool=False) act32 = conv_block(x=act31, filters=256, name='l3_2', training=training, reuse=reuse, pool=False) act33 = conv_block(x=act32, filters=256, name='l3_3', training=training, reuse=reuse, pool=False) poolact33 = pooling.max_pool(bottom=act33, name='l3_3_pool') with tf.variable_scope('g4', reuse=reuse): # Downsample act41 = conv_block(x=poolact33, name='l4_1', filters=512, training=training, reuse=reuse, pool=False) act42 = conv_block(x=act41, filters=512, name='l4_2', training=training, reuse=reuse, pool=False) act43 = conv_block(x=act42, filters=512, name='l4_3', training=training, reuse=reuse, pool=False) poolact43 = pooling.max_pool(bottom=act43, name='l4_3_pool') with tf.variable_scope('g5', reuse=reuse): # Downsample act51 = conv_block(x=poolact43, name='l5_1', filters=512, training=training, reuse=reuse, pool=False) act52 = conv_block(x=act51, filters=512, name='l5_2', training=training, reuse=reuse, pool=False) act53 = conv_block(x=act52, filters=512, name='l5_3', training=training, reuse=reuse, pool=False) poolact53 = pooling.max_pool(bottom=act53, name='l5_3_pool') with tf.variable_scope('g5_skip', reuse=reuse): upact5 = up_block(inputs=poolact53, skip=act53, up_filters=512, name='ul5', training=training, reuse=reuse) with tf.variable_scope('g4_skip', reuse=reuse): upact4 = up_block(inputs=upact5, skip=act43, up_filters=512, name='ul4', training=training, reuse=reuse) with tf.variable_scope('g3_skip', reuse=reuse): upact3 = up_block(inputs=upact4, skip=act33, up_filters=256, name='ul3', training=training, reuse=reuse) with tf.variable_scope('g2_skip', reuse=reuse): upact2 = up_block(inputs=upact3, skip=act22, up_filters=128, name='ul2', training=training, reuse=reuse) with tf.variable_scope('g1_skip', reuse=reuse): upact1 = up_block(inputs=upact2, skip=act12, up_filters=64, name='ul1', training=training, reuse=reuse) activity = conv.readout_layer(activity=upact1, reuse=reuse, training=training, output_shape=output_shape) extra_activities = {'activity': activity} return activity, extra_activities
def full(self, i0, x, l1_h2, l2_h2, fb_act_1): """hGRU body. Take the recurrent h2 from a low level and imbue it with information froma high layer. This means to treat the lower layer h2 as the X and the higher layer h2 as the recurrent state. This will serve as I/E from the high layer along with feedback kernels. """ activities = [] for idx, layer in enumerate(self.down_layers): layer_name, specs = layer.items()[0] print 'Building layer %s' % layer_name # Add FF drive if requested if 'ff' in specs.keys(): ff_specs = specs['ff'] strides = ff_specs['strides'] dilations = ff_specs['dilations'] check_params(layer_name='Layer %s_%s' % (layer, idx), strides=strides, dilations=dilations) for rep in range(repeats): self.create_ff_filters(idx=idx, rep=rep, kernels=kernels, bottom_features=all_features[idx], top_features=features) all_features += [features] # Create an hgru layer if requested if 'recurrent' in specs.keys(): rnn_specs = specs['recurrent'] features = rnn_specs['features'] h_kernel = rnn_specs['h_kernel'] g_kernel = rnn_specs['g_kernel'] check_params(features=features, kernels=h_kernel) self.create_rnn_filters(idx=idx, layer=layer, h_kernel=h_kernel, g_kernel=g_kernel, bottom_features=all_features[idx], top_features=features) all_features += [features] # # LAYER 1 hGRU # FF drive comes from outside recurrent loop if self.force_horizontal: fb_act_1 = l1_h2 l1_h1, l1_h2 = self.hgru_ops(i0=i0, x=x, h2=fb_act_1, layer='h1') l1_h2_scope = 'l1_h2_bn' if not self.while_loop: l1_h2_scope = '%s_t%s' % (l1_h2_scope, i0) if self.batch_norm: with tf.variable_scope(l1_h2_scope, reuse=self.scope_reuse) as scope: l1_h2 = tf.contrib.layers.batch_norm( inputs=l1_h2, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) # Pool the preceding layer's drive if self.include_pooling: l1_h2_pool = max_pool(bottom=l1_h2, k=[1] + self.pool_kernel + [1], s=[1] + self.pool_strides + [1], name='pool_h1') else: l1_h2_pool = l1_h2 # Conv hierarchy for high-level representation activity_1, conv_list_1 = self.conv_tower(activity=l1_h2_pool, pre_pool=l1_h2, tower_name='1', i0=i0) # # LAYER 2 hGRU # hGRU l2_h1, l2_h2 = self.hgru_ops(i0=i0, x=activity_1, h2=l2_h2, layer='h2') l2_h2_scope = 'l2_h2_bn' if not self.while_loop: l2_h2_scope = '%s_t%s' % (l2_h2_scope, i0) if self.batch_norm: with tf.variable_scope(l2_h2_scope, reuse=self.scope_reuse) as scope: l2_h2 = tf.contrib.layers.batch_norm( inputs=l2_h2, scale=True, center=False, fused=True, renorm=False, param_initializers=self.param_initializer, updates_collections=None, scope=scope, reuse=self.reuse, is_training=self.train) activity = l2_h2 # Add Upsamples activity = self.upsample_router(activity=activity, conv_list=conv_list, i0=i0) # # LAYER 1 tdGRU # Feedback from hgru-2 to hgru-1 fb_act_1 = self.td_router(activity=activity, l1_h2=l1_h2, i0=i0) # Iterate loop i0 += 1 return i0, x, l1_h2, l2_h2, fb_act_1
def input_layer(X, reuse, training, features, conv_kernel_size, pool_kernel_size=False, pool_kernel_strides=False, name='l0', conv_strides=(1, 1), conv_padding='same', conv_activation=tf.nn.relu, var_scope='input_1', pool=False, renorm=False, pool_type='max'): """Input layer for recurrent experiments in Kim et al., 2019.""" if not pool_kernel_size or not pool_kernel_strides: pool = False with tf.variable_scope(var_scope, reuse=reuse): if isinstance(conv_activation, list): act_0 = conv_activation[0] else: act_0 = conv_activation if not isinstance(features, list): features = [features, features] in_emb = tf.layers.conv2d(inputs=X, filters=features[0], kernel_size=conv_kernel_size, name='conv_0_%s' % name, strides=conv_strides, padding=conv_padding, activation=act_0, trainable=training, use_bias=True) # in_emb = normalization.batch( # bottom=in_emb, # name='input_layer_bn_0', # renorm=renorm, # training=training) if pool: if pool_type == 'max': in_emb = pooling.max_pool(bottom=in_emb, name='pool_%s' % name, k=pool_kernel_size, s=pool_kernel_strides) else: raise NotImplementedError if isinstance(conv_activation, list): act_1 = conv_activation[1] else: act_1 = conv_activation in_emb = tf.layers.conv2d(inputs=in_emb, filters=features[1], kernel_size=conv_kernel_size, name='conv_1_%s' % name, strides=conv_strides, padding=conv_padding, activation=act_1, trainable=training, use_bias=False) in_emb = normalization.batch(bottom=in_emb, name='input_layer_bn_1', renorm=renorm, training=training) return in_emb
def build_model(data_tensor, reuse, training, output_shape): """Create the hgru from Learning long-range...""" if isinstance(output_shape, list): output_shape = output_shape[0] elif isinstance(output_shape, dict): nhot_shape = output_shape['aux'] output_shape = output_shape['output'] use_aux = True with tf.variable_scope('cnn', reuse=reuse): with tf.variable_scope('input', reuse=reuse): in_emb = tf.layers.conv2d(inputs=data_tensor, filters=8, kernel_size=11, name='l0', strides=(1, 1), padding='same', activation=tf.nn.elu, trainable=training, use_bias=True) in_emb = pooling.max_pool(bottom=in_emb, name='p1', k=[1, 2, 2, 1], s=[1, 2, 2, 1]) in_emb = tf.layers.conv2d(inputs=in_emb, filters=8, kernel_size=7, name='l1', strides=(1, 1), padding='same', activation=tf.nn.elu, trainable=training, use_bias=True) layer_hgru = hgru.hGRU('hgru_1', x_shape=in_emb.get_shape().as_list(), timesteps=8, h_ext=11, strides=[1, 1, 1, 1], padding='SAME', aux={ 'reuse': False, 'constrain': False }, train=training) h2 = layer_hgru.build(in_emb) h2 = normalization.batch(bottom=h2, renorm=True, name='hgru_bn', training=training) with tf.variable_scope('readout_1', reuse=reuse): activity = conv.conv_layer(bottom=h2, name='pre_readout_conv', num_filters=2, kernel_size=1, trainable=training, use_bias=False) pool_aux = {'pool_type': 'max'} activity = pooling.global_pool(bottom=activity, name='pre_readout_pool', aux=pool_aux) activity = normalization.batch(bottom=activity, renorm=True, name='readout_1_bn', training=training) with tf.variable_scope('readout_2', reuse=reuse): pre_activity = tf.layers.flatten(activity, name='flat_readout') activity = tf.layers.dense(inputs=pre_activity, units=output_shape) if use_aux: nhot = tf.layers.dense(inputs=pre_activity, units=nhot_shape) else: nhot = tf.constant(0.) extra_activities = {'activity': activity, 'nhot': nhot} return activity, extra_activities
def build_model(data_tensor, reuse, training, output_shape): """Create the hgru from Learning long-range...""" if isinstance(output_shape, list): output_shape = output_shape[0] with tf.variable_scope('cnn', reuse=reuse): with tf.variable_scope('input', reuse=reuse): in_emb = tf.layers.conv2d(inputs=data_tensor, filters=4, kernel_size=7, name='l0', strides=(1, 1), padding='same', activation=tf.nn.elu, trainable=training, use_bias=True) in_emb = normalization.batch(bottom=in_emb, name='l0_bn', training=training) in_emb = tf.nn.relu(in_emb) in_emb = pooling.max_pool(bottom=in_emb, name='p1', k=[1, 2, 2, 1], s=[1, 2, 2, 1]) in_emb = tf.layers.conv2d(inputs=in_emb, filters=8, kernel_size=7, name='l1', strides=(1, 1), padding='same', activation=tf.nn.elu, trainable=training, use_bias=True) in_emb = normalization.batch(bottom=in_emb, name='l1_bn', training=training) in_emb = tf.nn.relu(in_emb) in_emb = pooling.max_pool(bottom=in_emb, name='p2', k=[1, 2, 2, 1], s=[1, 2, 2, 1]) with tf.variable_scope('v6', reuse=reuse): from layers.recurrent import v6_net as fgru_net from layers.feedforward import v6_ln_mk1 as fgru_layer in_shape = in_emb.get_shape().as_list() fgru_layer_optional_args = { 'swap_mix_sources': False, 'swap_gate_sources': False, 'turn_off_gates': False, 'featurewise_control': False, 'no_relu_h1': False } layer_hgru = fgru_net.hGRU( var_scope='fgru_net', timesteps=6, in_k=in_shape[-1], use_global_pool=True, share_ff_td_kernels=True, fgru_module_class=fgru_layer, hgru_fsiz=[5, 5], hgru_fanout_factor=3, hgru_h2_k=12, ff_conv_fsiz=[[5, 5], [3, 3], [3, 3]], #from low to high ff_conv_k=[16, 20, 28], ff_conv_strides=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], ff_pool_fsiz=[[2, 2], [2, 2], [2, 2]], ff_pool_strides=[[2, 2], [2, 2], [2, 2]], fb_conv_fsiz=[ [6, 6], [6, 6], [4, 4], [4, 4] ], #from low to high (which means higher layers will be called first during TD phase) fb_conv_k=[12, 16, 20, 28], train=True, dtype=tf.float32, **fgru_layer_optional_args) """ ####### NOTE ABOUT SWAPPING AN fGRU LAYER: ####### You can define an fgru module class and feed it to the fgru_net constructor. The constructor takes the class and internally constructs two fGRU layers. An fgru module class should take the following arguments: (<Str: layer name>, <Int: input # channels>, <Int: h1 channel fan-out factor>, <Int: h2 # channels>, <List: filter size>, <Bool: use 3d data>, <Bool: use symmetric kernel>, <Bool: reuse BN params over timesteps>, <Bool: train mode>, <tf.dtype: data type>, **fgru_layer_optional_args) As you can see, all the args marked by <...> are automatically defined based on fgru_net arguments. All you need to define at this level are the class-specific optional args as a dict. """ bottom, top = layer_hgru.build(in_emb) top = normalization.batch(bottom=top, name='hgru_bn', fused=True, training=training) with tf.variable_scope('readout', reuse=reuse): pre_activity = tf.layers.dense(inputs=top, units=28) activity = tf.layers.dense(inputs=pre_activity, units=output_shape) extra_activities = { 'activity': activity, } return activity, extra_activities