def build_model(t_params, n_dim_img, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, optimizer): ''' Build the whole model for training ''' x = tensor.tensor3('x', config.floatX) mask_x = tensor.matrix('mask_x', 'int8') # Encoder(s) and initialization of hidden layer enc = gru(mask_x, dropout(x), t_params, n_dim_img, n_dim_enc, 'enc')[-1] init_h = tensor.tanh(dense(enc, t_params, n_dim_enc, n_dim_dec, 'init_h')) y = tensor.matrix('y', 'int32') mask_y = tensor.matrix('mask_y', 'int8') n_steps, n_samples = y.shape # Word embedding emb = embedding(y, t_params, n_dim_vocab, n_dim_txt, 'emb').reshape((n_steps, n_samples, n_dim_txt))[: -1] emb = tensor.concatenate([tensor.zeros((1, n_samples, n_dim_txt), config.floatX), emb]) # Decoder(s) dec = gru(mask_y, emb, t_params, n_dim_txt, n_dim_dec, 'dec', init_h=init_h) # Full-connected layer fc = dense(dropout(dec), t_params, n_dim_dec, n_dim_vocab, 'fc') # Classifier prob = tensor.nnet.softmax(fc.reshape((n_steps * n_samples, n_dim_vocab))) # Cost function cost = prob[tensor.arange(n_steps * n_samples), y.flatten()].reshape((n_steps, n_samples)) cost = ((-tensor.log(cost + 1e-6) * mask_y).sum(0) / mask_y.astype(config.floatX).sum(0)).mean() grads = tensor.grad(cost, list(t_params.values())) f_cost, f_update = optimizer(tensor.scalar('lr'), t_params, grads, [x, mask_x, y, mask_y], cost) return f_cost, f_update
def __init__(self,layer_nums=0,activation='relu',dropout=False): self.layers=[] if layer_nums==0: return for i in range(len(layer_nums)-2): self.add(layers.linear(layer_nums[i],layer_nums[i+1])) self.add(self.str_to_layer(activation)()) if dropout: self.add(layers.dropout()) self.add(layers.linear(layer_nums[-2],layer_nums[-1])) self.add(layers.softmax())
def __init__(self, numpy_rng, theano_rng=None, n_ins=40*3, layers_types=[Linear, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[1024, 1024, 1024, 1024], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], n_outs=62 * 3, rho=0.9, eps=1.E-6, # TODO refine debugprint=False): super(DropoutNet, self).__init__(numpy_rng, theano_rng, n_ins, layers_types, layers_sizes, n_outs, rho, eps, debugprint) self.dropout_rates = dropout_rates dropout_layer_input = dropout(numpy_rng, self.x, p=dropout_rates[0]) self.dropout_layers = [] for layer, layer_type, n_in, n_out, dr in zip(self.layers, layers_types, self.layers_ins, self.layers_outs, dropout_rates[1:] + [0]): # !!! we do not dropout anything # from the last layer !!! this_layer = layer_type(rng=numpy_rng, input=dropout_layer_input, n_in=n_in, n_out=n_out, W=layer.W * 1. / (1. - dr), # experimental b=layer.b * 1. / (1. - dr)) # TODO check assert hasattr(this_layer, 'output') # N.B. dropout with dr=1 does not dropanything!! this_layer.output = dropout(numpy_rng, this_layer.output, dr) self.dropout_layers.append(this_layer) dropout_layer_input = this_layer.output assert hasattr(self.layers[-1], 'training_cost') assert hasattr(self.layers[-1], 'errors') # TODO standardize cost # these are the dropout costs self.mean_cost = self.dropout_layers[-1].negative_log_likelihood(self.y) self.cost = self.dropout_layers[-1].training_cost(self.y) # these is the non-dropout errors self.errors = self.layers[-1].errors(self.y)
def char_model(self, is_training, hparams, chars, embedding_char_size, tags, inputs_char, indexs_start, indexs_end, targets_w): """Character model.""" with tf.variable_scope('chars'): if is_training: embed_dims = [chars, embedding_char_size] np.random.seed(seed=1) embeddings_char = np.random.randn(*embed_dims).astype( np.float32) cembed = tf.get_variable('char_embeddings', dtype=tf.float32, initializer=embeddings_char) else: cembed = tf.get_variable('char_embeddings') # joint for both embed_nd = tf.nn.embedding_lookup(cembed, inputs_char[:, :]) embed = layers.dropout(is_training, hparams.embed_keep_prob_ch, embed_nd) output_fw, output_bw, _ = layers.lstm_layers( is_training, embed, hparams.num_layers_chars, hparams.hidden_char_size, hparams.recur_keep_prob) # Gather forward start and end of word of char LSTM output. output_fw_fst = tf.gather_nd(output_fw, indexs_start) output_fw_lst = tf.gather_nd(output_fw, indexs_end) # Gather backword start and end of word of char LSTM output. output_bw_fst = tf.gather_nd(output_bw, indexs_start) output_bw_lst = tf.gather_nd(output_bw, indexs_end) # Gathered LSTM outputs into the right shape and concatenate it. outputs = tf.concat( [output_fw_fst, output_fw_lst, output_bw_fst, output_bw_lst], axis=2) outputs = layers.mlp(is_training, outputs, output_size=hparams.mlp_size, keep_prob=hparams.keep_prob) targets = targets_w[:, :] tok_keep = tf.to_float(tf.greater(targets, PAD)) linear = layers.linear_with_dropout(is_training, outputs, tags, keep_prob=hparams.keep_prob) preds = tf.to_int32(tf.argmax(linear, axis=-1)) if is_training: int_tok_keep = tf.to_int32(tok_keep) t_correct = tf.to_int32(tf.equal(preds, targets)) * int_tok_keep accuracy = tf.reduce_sum(t_correct) / tf.reduce_sum( int_tok_keep) loss = tf.losses.sparse_softmax_cross_entropy( targets, linear, tok_keep) return loss, accuracy else: return preds, outputs
def build_30s(color_inputs, num_classes, is_training): """ Build unet network: ---------- Args: color_inputs: Tensor, [batch_size, length, 3] num_classes: Integer, number of segmentation (annotation) labels is_training: Boolean, in training mode or not (for dropout & bn) Returns: logits: Tensor, predicted annotated image flattened [batch_size * length, num_classes] """ dropout_keep_prob = tf.where(is_training, 0.2, 1.0) # Encoder Section # Block 1 # color_conv1_1 = layers.conv_btn(color_inputs, [3, 3], 64, 'conv1_1', is_training = is_training) color_conv1_1 = layers.conv_btn1(color_inputs, 3, 32, 'conv1_1', is_training=is_training) #layers.conv1(current_layer, c, ksize, stride=2, scope='conv{}'.format(i + 1), padding='SAME') color_conv1_2 = layers.conv_btn1(color_conv1_1, 3, 32, 'conv1_2', is_training=is_training) color_pool1 = layers.maxpool(color_conv1_2, 4, 'pool1') # Block 2 color_conv2_1 = layers.conv_btn1(color_pool1, 3, 32, 'conv2_1', is_training=is_training) color_conv2_2 = layers.conv_btn1(color_conv2_1, 3, 32, 'conv2_2', is_training=is_training) color_pool2 = layers.maxpool(color_conv2_2, 4, 'pool2') # Block 3 color_conv3_1 = layers.conv_btn1(color_pool2, 3, 64, 'conv3_1', is_training=is_training) color_conv3_2 = layers.conv_btn1(color_conv3_1, 3, 64, 'conv3_2', is_training=is_training) color_pool3 = layers.maxpool(color_conv3_2, 4, 'pool3') color_drop3 = layers.dropout(color_pool3, dropout_keep_prob, 'drop3') # Block 4 color_conv4_1 = layers.conv_btn1(color_drop3, 3, 64, 'conv4_1', is_training=is_training) color_conv4_2 = layers.conv_btn1(color_conv4_1, 3, 64, 'conv4_2', is_training=is_training) color_pool4 = layers.maxpool(color_conv4_2, 4, 'pool4') color_drop4 = layers.dropout(color_pool4, dropout_keep_prob, 'drop4') # Block 5 color_conv5_1 = layers.conv_btn1(color_drop4, 3, 128, 'conv5_1', is_training=is_training) color_conv5_2 = layers.conv_btn1(color_conv5_1, 3, 128, 'conv5_2', is_training=is_training) color_drop5 = layers.dropout(color_conv5_2, dropout_keep_prob, 'drop5') # Decoder Section # Block 1 upsample61 = layers.deconv_upsample(color_drop5, 4, 'upsample6') upsample61 = Cropping1D(cropping=((0, 1)))(upsample61) concat6 = layers.concat(upsample61, color_conv4_2, 'concat6') color_conv6_1 = layers.conv_btn1(concat6, 3, 128, 'conv6_1', is_training=is_training) # color_conv6_2 = layers.conv_btn1(color_conv6_1, 6, 128, 'conv6_2', is_training = is_training) color_drop6 = layers.dropout(color_conv6_1, dropout_keep_prob, 'drop6') # Block 2 upsample7 = layers.deconv_upsample(color_drop6, 4, 'upsample7') # upsample7 = Cropping1D(cropping=((0, 1)))(upsample7) concat7 = layers.concat(upsample7, color_conv3_2, 'concat7') color_conv7_1 = layers.conv_btn1(concat7, 3, 64, 'conv7_1', is_training=is_training) # color_conv7_2 = layers.conv_btn1(color_conv7_1, 6, 64, 'conv7_1', is_training = is_training) color_drop7 = layers.dropout(color_conv7_1, dropout_keep_prob, 'drop7') # Block 3 upsample81 = layers.deconv_upsample(color_drop7, 4, 'upsample8') upsample81 = Cropping1D(cropping=((0, 1)))(upsample81) concat8 = layers.concat(upsample81, color_conv2_2, 'concat8') color_conv8_1 = layers.conv_btn1(concat8, 3, 32, 'conv8_1', is_training=is_training) # color_conv8_2 = layers.conv_btn1(color_conv8_1, 3, 32, 'conv8_1', is_training = is_training) # Block 4 upsample91 = layers.deconv_upsample(color_conv8_1, 4, 'upsample9') upsample91 = Cropping1D(cropping=((1, 2)))(upsample91) concat9 = layers.concat(upsample91, color_conv1_2, 'concat9') color_conv9_1 = layers.conv_btn1(concat9, 3, 32, 'conv9_1', is_training=is_training) # color_conv9_2 = layers.conv_btn1(color_conv9_1, 3, 32, 'conv9_1', is_training = is_training) # Block 5 score = layers.conv(color_conv9_1, 1, num_classes, 'score', activation_fn=None) logits = tf.reshape(score, (-1, num_classes)) return logits
def scaled_dot_product_attention(queries, keys, values, num_heads=1, dropout_rate=0.): """ The dot-product attention. Attention mechanism can be seen as mapping a query and a set of key-value pairs to an output. The output is computed as a weighted sum of the values, where the weight assigned to each value is computed by a compatibility function (dot-product here) of the query with the corresponding key. The dot-product attention can be implemented through (batch) matrix multipication as follows: .. math:: Attention(Q, K, V)= softmax(QK^\mathrm{T})V Refer to `Attention Is All You Need <https://arxiv.org/pdf/1706.03762.pdf>`_. Args: queries (Variable): The input variable which should be a 3-D Tensor. keys (Variable): The input variable which should be a 3-D Tensor. values (Variable): The input variable which should be a 3-D Tensor. num_heads (int): Head number to compute the scaled dot product attention. Default: 1. dropout_rate (float): The dropout rate to drop the attention weight. Default: 0.0. Returns: Variable: A 3-D Tensor computed by multi-head scaled dot product\ attention. Raises: ValueError: If input queries, keys, values are not 3-D Tensors. NOTES: 1. When num_heads > 1, three linear projections are learned respectively to map input queries, keys and values into queries', keys' and values'. queries', keys' and values' have the same shapes with queries, keys and values. 2. When num_heads == 1, scaled_dot_product_attention has no learnable parameters. Examples: .. code-block:: python queries = fluid.layers.data(name="queries", shape=[3, 5, 9], dtype="float32", append_batch_size=False) queries.stop_gradient = False keys = fluid.layers.data(name="keys", shape=[3, 6, 9], dtype="float32", append_batch_size=False) keys.stop_gradient = False values = fluid.layers.data(name="values", shape=[3, 6, 10], dtype="float32", append_batch_size=False) values.stop_gradient = False contexts = fluid.nets.scaled_dot_product_attention(queries, keys, values) contexts.shape # [3, 5, 10] """ if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): raise ValueError( "Inputs quries, keys and values should all be 3-D tensors.") if queries.shape[-1] != keys.shape[-1]: raise ValueError( "The hidden size of queries and keys should be the same.") if keys.shape[-2] != values.shape[-2]: raise ValueError( "The max sequence length in query batch and in key batch " "should be the same.") if keys.shape[-1] % num_heads != 0: raise ValueError("The hidden size of keys (%d) must be divisible " "by the number of attention heads (%d)." % (keys.shape[-1], num_heads)) if values.shape[-1] % num_heads != 0: raise ValueError("The hidden size of values (%d) must be divisible " "by the number of attention heads (%d)." % (values.shape[-1], num_heads)) def __compute_qkv(queries, keys, values, num_heads): """ Add linear projection to queries, keys, and values. Args: queries(Tensor): a 3-D input Tensor. keys(Tensor): a 3-D input Tensor. values(Tensor): a 3-D input Tensor. num_heads(int): The number of heads. Linearly project the inputs ONLY when num_heads > 1. Returns: Tensor: linearly projected output Tensors: queries', keys' and values'. They have the same shapes with queries, keys and values. """ if num_heads == 1: return queries, keys, values q = layers.fc(input=queries, size=queries.shape[-1], num_flatten_dims=2) k = layers.fc(input=keys, size=keys.shape[-1], num_flatten_dims=2) v = layers.fc(input=values, size=values.shape[-1], num_flatten_dims=2) return q, k, v def __split_heads(x, num_heads): """ Reshape the last dimension of inpunt tensor x so that it becomes two dimensions. Args: x(Tensor): a 3-D input Tensor. num_heads(int): The number of heads. Returns: Tensor: a Tensor with shape [..., n, m/num_heads], where m is size of the last dimension of x. """ if num_heads == 1: return x hidden_size = x.shape[-1] # reshape the 3-D input: [batch_size, max_sequence_length, hidden_dim] # into a 4-D output: # [batch_size, max_sequence_length, num_heads, hidden_size_per_head]. reshaped = layers.reshape(x=x, shape=list(x.shape[:-1]) + [num_heads, hidden_size // num_heads]) # permuate the dimensions into: # [batch_size, num_heads, max_sequence_len, hidden_size_per_head] return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) def __combine_heads(x): """ Reshape the last two dimensions of inpunt tensor x so that it becomes one dimension. Args: x(Tensor): a 4-D input Tensor with shape [bs, num_heads, max_sequence_length, hidden_dim]. Returns: Tensor: a Tensor with shape [bs, max_sequence_length, num_heads * hidden_dim]. """ if len(x.shape) == 3: return x if len(x.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) return layers.reshape(x=trans_x, shape=map(int, [ trans_x.shape[0], trans_x.shape[1], trans_x.shape[2] * trans_x.shape[3] ])) q, k, v = __compute_qkv(queries, keys, values, num_heads) q = __split_heads(q, num_heads) k = __split_heads(k, num_heads) v = __split_heads(v, num_heads) key_dim_per_head = keys.shape[-1] // num_heads scaled_q = layers.scale(x=q, scale=key_dim_per_head**-0.5) product = layers.matmul(x=k, y=scaled_q, transpose_y=True) weights = layers.reshape(x=layers.reshape(x=product, shape=[-1, product.shape[-1]], act="softmax"), shape=product.shape) if dropout_rate: weights = layers.dropout(weights, dropout_prob=dropout_rate, is_test=False) ctx_multiheads = layers.matmul(weights, v) return __combine_heads(ctx_multiheads)
def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000): ####################################### SETTINGS ################################### self.x_train = x_train self.x_test = x_test self.diff = diff self.batch_size = 100.0 self.learning_rate = theano.shared(np.float32(0.0008)) self.momentum = 0.3 self.performance = {"train": [], "test": []} self.inpt = T.ftensor4(name="input") self.df = T.fmatrix(name="differential") self.dim_z = dim_z self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z]))) self.activation = relu self.generative = False self.out_distribution = False # self.y = T.matrix(name="y") self.in_filters = [64, 64, 64] self.filter_lengths = [10.0, 10.0, 10.0] self.params = [] # magic = 73888. self.magic = magic self.dropout_symbolic = T.fscalar() self.dropout_prob = theano.shared(np.float32(0.0)) ####################################### LAYERS ###################################### # LAYER 1 ############################## self.conv1 = one_d_conv_layer( self.inpt, self.in_filters[0], 1, self.filter_lengths[0], param_names=["W1", "b1"] ) self.params += self.conv1.params self.bn1 = batchnorm(self.conv1.output) self.nl1 = self.activation(self.bn1.X) self.maxpool1 = pool_2d(self.nl1, [3, 1], stride=[2, 1], mode="average_exc_pad").astype(theano.config.floatX) self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic) # self.layer1_out = self.maxpool1 # LAYER2 ################################ self.flattened = T.flatten(self.layer1_out, outdim=2) # Variational Layer ##################### self.latent_layer = variational_gauss_layer(self.flattened, self.magic, dim_z) self.params += self.latent_layer.params self.latent_out = self.latent_layer.output # Hidden Layer ######################### self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic) self.params += self.hidden_layer.params self.hid_out = dropout( self.activation(self.hidden_layer.output).reshape( (self.inpt.shape[0], self.in_filters[-1], int(self.magic / self.in_filters[-1]), 1) ), self.dropout_symbolic, ) # Devonvolutional 1 ###################### self.deconv1 = one_d_deconv_layer( self.hid_out, 1, self.in_filters[2], self.filter_lengths[2], pool=2.0, param_names=["W3", "b3"], distribution=False, ) self.params += self.deconv1.params # self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic) self.tanh_out = self.deconv1.output self.last_layer = self.deconv1 if self.out_distribution == True: self.trunk_sigma = self.last_layer.log_sigma[:, :, : self.inpt.shape[2], :] self.trunc_output = self.tanh_out[:, :, : self.inpt.shape[2], :] ################################### FUNCTIONS ###################################################### self.get_latent_states = theano.function( [self.inpt], self.latent_out, givens=[[self.dropout_symbolic, self.dropout_prob]] ) # self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior]) # self.get_prior = theano.function([self.inpt],self.latent_layer.prior) # self.convolve1 = theano.function([self.inpt],self.layer1_out) # self.convolve2 = theano.function([self.inpt],self.layer2_out) self.output = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]] ) self.get_flattened = theano.function( [self.inpt], self.flattened, givens=[[self.dropout_symbolic, self.dropout_prob]] ) # self.deconvolve1 = theano.function([self.inpt],self.deconv1.output) # self.deconvolve2 = theano.function([self.inpt],self.deconv2.output) # self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2)) self.output = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]] ) # self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) self.generate_from_z = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob], [self.latent_out, self.generative_z]], ) self.cost = self.MSE() self.mse = self.MSE() # self.likelihood = self.log_px_z() # self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) # self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) self.derivatives = T.grad(self.cost, self.params) # self.get_gradients = theano.function([self.inpt],self.derivatives) self.updates = adam(self.params, self.derivatives, self.learning_rate) # self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum) self.train_model = theano.function( inputs=[self.inpt, self.df], outputs=self.cost, updates=self.updates, givens=[[self.dropout_symbolic, self.dropout_prob]], )
def forward(self): # in: c, q, c_mask, q_mask, ch, qh, y1, y2 # out: yp1, yp2, loss config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope('emb'): with tf.variable_scope('char'): ch_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.variable_scope('word'): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb_ori = tf.concat([c_emb, ch_emb], axis=2) q_emb_ori = tf.concat([q_emb, qh_emb], axis=2) # spatial dropout if config.use_spatial_dp: print("Using spatial dropout\n") if self.is_train: q_emb_shape = tf.shape(q_emb_ori) c_emb_shape = tf.shape(c_emb_ori) q_emb = tf.nn.dropout(q_emb_ori, keep_prob=0.5 + config.keep_prob / 2, noise_shape=(q_emb_shape[0], 1, q_emb_shape[2])) c_emb = tf.nn.dropout(c_emb_ori, keep_prob=0.5 + config.keep_prob / 2, noise_shape=(c_emb_shape[0], 1, c_emb_shape[2])) else: q_emb = q_emb_ori c_emb = c_emb_ori else: c_emb = c_emb_ori q_emb = q_emb_ori # context encoding: method1 with tf.variable_scope('encoding'): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len, concat=True, keep_origin_input=True) q = rnn(q_emb, seq_len=self.q_len, concat=True, keep_origin_input=True) with tf.variable_scope('attention'): qc_att = dot_attention(inputs=c, memory=q, hidden_size=d, mask=self.q_mask, keep_prob=config.keep_prob, is_train=self.is_train, scope='qc_dot_att') rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train, scope='qc') cq_att = dot_attention(inputs=q, memory=c, hidden_size=d, mask=self.c_mask, keep_prob=config.keep_prob, is_train=self.is_train, scope='cq_dot_att') rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=cq_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train, scope='cq') c = rnn(qc_att, seq_len=self.c_len, keep_origin_input=False) q = rnn(cq_att, seq_len=self.q_len, keep_origin_input=False) # seq_length = self.q_len # idx = tf.concat( # [tf.expand_dims(tf.range(tf.shape(q)[0]), axis=1), # tf.expand_dims(seq_length - 1, axis=1)], axis=1) # # (B, 2h) # q_state = tf.gather_nd(q, idx) with tf.variable_scope('hybrid'): # B * N * Q doc_qry_mask = tf.keras.backend.batch_dot( tf.expand_dims(tf.cast(self.c_mask, tf.float32), 2), tf.expand_dims(tf.cast(self.q_mask, tf.float32), 1), axes=[2, 1]) # (B, D, Q, 2h) doc_expand_embed = tf.tile(tf.expand_dims(c, 2), [1, 1, self.q_maxlen, 1]) # (B, D, Q, 2h) qry_expand_embed = tf.tile(tf.expand_dims(q, 1), [1, self.c_maxlen, 1, 1]) doc_qry_dot_embed = doc_expand_embed * qry_expand_embed # (B, D, Q, 6h) doc_qry_embed = tf.concat( [doc_expand_embed, qry_expand_embed, doc_qry_dot_embed], axis=3) # attention way num_units = doc_qry_embed.shape[-1] with tf.variable_scope('bi_attention'): w = tf.get_variable('W_att', shape=(num_units, 1), dtype=tf.float32, initializer=tf.random_uniform_initializer( -0.01, 0.01)) # (B, D, Q) S = tf.matmul( tf.reshape(doc_qry_embed, (-1, doc_qry_embed.shape[-1])), w) S = tf.reshape(S, (N, self.c_maxlen, self.q_maxlen)) # context2query, (B, D, 2h) c2q = tf.keras.backend.batch_dot( tf.nn.softmax(softmax_mask(S, doc_qry_mask), dim=2), q) c2q_gated = c2q * c with tf.variable_scope('gated_attention'): # Gated Attention g_doc_qry_att = tf.keras.backend.batch_dot( c, tf.transpose(q, (0, 2, 1))) # B * N * Q alphas = tf.nn.softmax(softmax_mask(g_doc_qry_att, doc_qry_mask), dim=2) q_rep = tf.keras.backend.batch_dot(alphas, q) # B x N x 2D d_gated = c * q_rep G = tf.concat([c, c2q, q_rep, c2q_gated, d_gated], axis=-1) # G = tf.nn.relu(dense(G, d * 2)) with tf.variable_scope('match'): G = dot_attention(inputs=G, memory=G, hidden_size=d, mask=self.c_mask, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=G.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) doc_encoding = rnn(G, seq_len=self.c_len, concat=False) with tf.variable_scope('pointer'): # Use self-attention or bilinear attention init = summ(q, d, mask=self.q_mask, keep_prob=config.keep_prob, is_train=self.is_train) # init = self.bilinear_attention_layer(c, q_state, self.c_mask) pointer = ptr_layer(batch_size=N, hidden_size=init.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, doc_encoding, d, self.c_mask) with tf.variable_scope('predict'): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) # loss1 = tf.nn.softmax_cross_entropy_with_logits_v2( # logits=logits1, labels=tf.stop_gradient(self.y1)) # loss2 = tf.nn.softmax_cross_entropy_with_logits_v2( # logits=logits2, labels=tf.stop_gradient(self.y2)) if config.use_ghmc_or_ghmr == 'ghmc': print('Using GHMC Loss\n') ghmc_loss_func = GHMC_loss(momentum=0.) loss1 = ghmc_loss_func(logits1, tf.stop_gradient(self.y1)) loss2 = ghmc_loss_func(logits2, tf.stop_gradient(self.y2)) elif config.use_ghmc_or_ghmr == 'ghmr': print('Using GHMR Loss\n') ghmr_loss_func = GHMR_loss() loss1 = ghmr_loss_func(logits1, tf.stop_gradient(self.y1)) loss2 = ghmr_loss_func(logits2, tf.stop_gradient(self.y2)) else: loss1 = tf.nn.softmax_cross_entropy_with_logits( logits=logits1, labels=tf.stop_gradient(self.y1)) loss2 = tf.nn.softmax_cross_entropy_with_logits( logits=logits2, labels=tf.stop_gradient(self.y2)) self.loss = tf.reduce_mean(loss1 + loss2)
def get_model(X, batch_size, image_dimension): input_shape = (batch_size, 3, image_dimension, image_dimension) all_parameters = [] ############################################# # a first convolution with 32 (3, 3) filters output, output_test, params, output_shape = convolutional( X, X, input_shape, 32, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool( output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') # dropout output, output_test, params, output_shape = dropout( output, output_test, output_shape) ############################################# # a second convolution with 32 (3, 3) filters output, output_test, params, output_shape = convolutional( output, output_test, output_shape, 32, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool( output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') # dropout output, output_test, params, output_shape = dropout( output, output_test, output_shape) ############################################# # a third convolution with 32 (3, 3) filters output, output_test, params, output_shape = convolutional( output, output_test, output_shape, 32, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool( output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') # dropout output, output_test, params, output_shape = dropout( output, output_test, output_shape) ############################################# # MLP first layer output = output.flatten(2) output_test = output_test.flatten(2) output, output_test, params, output_shape = linear( output, output_test, (output_shape[0], output_shape[1] * output_shape[2] * output_shape[3]), 500) all_parameters += params output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') ############################################# # MLP second layer output, output_test, params, output_shape = linear(output, output_test, output_shape, 1) all_parameters += params output, output_test, params, output_shape = activation( output, output_test, output_shape, 'sigmoid') # return output, output_test, all_parameters
def forward(self): # in: c, q, c_mask, q_mask, ch, qh, y1, y2 # out: yp1, yp2, loss config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru gru = native_sru if config.use_sru else gru with tf.variable_scope('emb'): with tf.variable_scope('char'): ch_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.variable_scope('word'): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope('encoding'): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) tf.get_variable_scope().reuse_variables() q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope('attention'): qc_att = dot_attention(inputs=c, memory=q, hidden_size=d, mask=self.q_mask, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope('match'): self_att = dot_attention(inputs=att, memory=att, hidden_size=d, mask=self.c_mask, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) with tf.variable_scope('pointer'): init = summ(q[:,:,-2 * d:], d, mask=self.q_mask, keep_prob=config.keep_prob, is_train=self.is_train) pointer = ptr_layer(batch_size=N, hidden_size=init.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope('predict'): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) # loss1 = tf.nn.softmax_cross_entropy_with_logits_v2( # logits=logits1, labels=tf.stop_gradient(self.y1)) loss1 = tf.nn.softmax_cross_entropy_with_logits( logits=logits1, labels=tf.stop_gradient(self.y1)) # loss2 = tf.nn.softmax_cross_entropy_with_logits_v2( # logits=logits2, labels=tf.stop_gradient(self.y2)) loss2 = tf.nn.softmax_cross_entropy_with_logits( logits=logits2, labels=tf.stop_gradient(self.y2)) self.loss = tf.reduce_mean(loss1 + loss2)
def get_model(X, batch_size, image_dimension): input_shape = (batch_size, 3, image_dimension, image_dimension) all_parameters = [] ############################################# # a first convolution with 32 (3, 3) filters output, output_test, params, output_shape = convolutional(X, X, input_shape, 32, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') # dropout output, output_test, params, output_shape = dropout(output, output_test, output_shape) ############################################# # a second convolution with 32 (3, 3) filters output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 32, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') # dropout output, output_test, params, output_shape = dropout(output, output_test, output_shape) ############################################# # a third convolution with 32 (3, 3) filters output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 32, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') # dropout output, output_test, params, output_shape = dropout(output, output_test, output_shape) ############################################# # MLP first layer output = output.flatten(2) output_test = output_test.flatten(2) output, output_test, params, output_shape = linear(output, output_test, (output_shape[0], output_shape[1]*output_shape[2]*output_shape[3]), 500) all_parameters += params output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') ############################################# # MLP second layer output, output_test, params, output_shape = linear(output, output_test, output_shape, 1) all_parameters += params output, output_test, params, output_shape = activation(output, output_test, output_shape, 'sigmoid') # return output, output_test, all_parameters
def parameter_efficient(in_channels=1, out_channels=2, start_filters=64, input_side_length=256, depth=4, res_blocks=2, filter_size=3, sparse_labels=True, batch_size=1, activation="cReLU", batch_norm=True): """ Creates the graph for the parameter efficient variant of the U-Net and sets up the appropriate input and output placeholder. Parameters ---------- in_channels: int The depth of the input. out_channels: int The depth of number of classes of the output. start_filters : int The number of filters in the first convolution. input_side_length: int The side length of the square input. depth: int The depth of the U-part of the network. This is equal to the number of max-pooling layers. res_blocks: int The number of residual blocks in between max-pooling layers on the down-path and in between up-convolutions on the up-path. filter_size: int The width and height of the filter. The receptive field. sparse_labels: bool If true, the labels are integers, one integer per pixel, denoting the class that that pixel belongs to. If false, labels are one-hot encoded. batch_size: int The training batch size. activation: string Either "ReLU" for the standard ReLU activation or "cReLU" for the concatenated ReLU activation function. batch_norm: bool Whether to use batch normalization or not. Returns ------- inputs : TF tensor The network input. logits: TF tensor The network output before SoftMax. ground_truth: TF tensor The desired output from the ground truth. keep_prob: TF float The TF variable holding the keep probability for drop out layers. training_bool: TF bool The TF variable holding the boolean value, which switches batch normalization to training or inference mode. """ activation = str.lower(activation) if activation not in ["relu", "crelu"]: raise ValueError("activation must be \"ReLU\" or \"cReLU\".") pool_size = 2 # Define inputs and helper functions # with tf.variable_scope('inputs'): inputs = tf.placeholder(tf.float32, shape=(batch_size, input_side_length, input_side_length, in_channels), name='inputs') if sparse_labels: ground_truth = tf.placeholder(tf.int32, shape=(batch_size, input_side_length, input_side_length), name='labels') else: ground_truth = tf.placeholder(tf.float32, shape=(batch_size, input_side_length, input_side_length, out_channels), name='labels') keep_prob = tf.placeholder(tf.float32, shape=[], name='keep_prob') training = tf.placeholder(tf.bool, shape=[], name="training") network_input = tf.transpose(inputs, perm=[0, 3, 1, 2]) # [conv -> conv -> max pool -> drop out] + parameter updates def step_down(name, input_, filter_size=3, res_blocks=2, keep_prob=1., training=False): with tf.variable_scope(name): with tf.variable_scope("res_block_0"): conv_out, tiled_input = layers.res_block( input_, filter_size, channel_multiplier=2, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") for i in xrange(1, res_blocks): with tf.variable_scope("res_block_" + str(i)): conv_out = layers.res_block(conv_out, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") conv_out = conv_out + tiled_input pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW") bottom_out = layers.dropout(pool_out, keep_prob) side_out = layers.dropout(conv_out, keep_prob) return bottom_out, side_out # parameter updates + [upconv and concat -> drop out -> conv -> conv] def step_up(name, bottom_input, side_input, filter_size=3, res_blocks=2, keep_prob=1., training=False): with tf.variable_scope(name): added_input = layers.upconv_add_block(bottom_input, side_input, data_format="NCHW") conv_out = added_input for i in xrange(res_blocks): with tf.variable_scope("res_block_" + str(i)): conv_out = layers.res_block(conv_out, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") result = layers.dropout(conv_out, keep_prob) return result # Build the network # with tf.variable_scope('contracting'): outputs = [] with tf.variable_scope("step_0"): # Conv 1 in_filters = in_channels out_filters = start_filters stddev = np.sqrt(2. / (filter_size**2 * in_filters)) w = layers.weight_variable( [filter_size, filter_size, in_filters, out_filters], stddev=stddev, name="weights") out_ = tf.nn.conv2d(network_input, w, [1, 1, 1, 1], padding="SAME", data_format="NCHW") out_ = out_ + layers.bias_variable([out_filters, 1, 1], name='biases') # Batch Norm 1 if batch_norm: out_ = tf.layers.batch_normalization(out_, axis=1, momentum=0.999, center=True, scale=True, training=training, trainable=True, name="batch_norm", fused=True) in_filters = out_filters # concatenated ReLU if activation == "crelu": out_ = tf.concat([out_, -out_], axis=1) in_filters = 2 * in_filters out_ = tf.nn.relu(out_) # Conv 2 stddev = np.sqrt(2. / (filter_size**2 * in_filters)) w = layers.weight_variable( [filter_size, filter_size, in_filters, out_filters], stddev=stddev, name="weights") out_ = tf.nn.conv2d(out_, w, [1, 1, 1, 1], padding="SAME", data_format="NCHW") out_ = out_ + layers.bias_variable([out_filters, 1, 1], name='biases') # Res Block 1 conv_out = layers.res_block(out_, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW") bottom_out = layers.dropout(pool_out, keep_prob) side_out = layers.dropout(conv_out, keep_prob) outputs.append(side_out) # Build contracting path for i in xrange(1, depth): bottom_out, side_out = step_down('step_' + str(i), bottom_out, filter_size=filter_size, res_blocks=res_blocks, keep_prob=keep_prob, training=training) outputs.append(side_out) # Bottom [conv -> conv] with tf.variable_scope('step_' + str(depth)): with tf.variable_scope("res_block_0"): conv_out, tiled_input = layers.res_block(bottom_out, filter_size, channel_multiplier=2, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") for i in xrange(1, res_blocks): with tf.variable_scope("res_block_" + str(i)): conv_out = layers.res_block(conv_out, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") conv_out = conv_out + tiled_input current_tensor = layers.dropout(conv_out, keep_prob) with tf.variable_scope('expanding'): # Set initial parameter outputs.reverse() # Build expanding path for i in xrange(depth): current_tensor = step_up('step_' + str(depth + i + 1), current_tensor, outputs[i], filter_size=filter_size, res_blocks=res_blocks, keep_prob=keep_prob, training=training) # Last layer is a 1x1 convolution to get the predictions # We don't want an activation function for this one (softmax will be applied later), so we're doing it manually in_filters = current_tensor.shape.as_list()[1] stddev = np.sqrt(2. / in_filters) with tf.variable_scope('classification'): w = layers.weight_variable([1, 1, in_filters, out_channels], stddev, name='weights') b = layers.bias_variable([out_channels, 1, 1], name='biases') conv = tf.nn.conv2d(current_tensor, w, strides=[1, 1, 1, 1], padding="SAME", data_format="NCHW", name='conv') logits = conv + b logits = tf.transpose(logits, perm=[0, 2, 3, 1]) return inputs, logits, ground_truth, keep_prob, training
def unet(in_channels=1, out_channels=2, start_filters=64, side_length=572, depth=4, convolutions=2, filter_size=3, sparse_labels=True, batch_size=1): """ Creates the graph for the standard U-Net and sets up the appropriate input and output placeholder. Parameters ---------- in_channels: int The depth of the input. out_channels: int The depth of number of classes of the output. start_filters : int The number of filters in the first convolution. side_length: int The side length of the square input. depth: int The depth of the U-part of the network. This is equal to the number of max-pooling layers. convolutions: int The number of convolutions in between max-pooling layers on the down-path and in between up-convolutions on the up-path. filter_size: int The width and height of the filter. The receptive field. sparse_labels: bool If true, the labels are integers, one integer per pixel, denoting the class that that pixel belongs to. If false, labels are one-hot encoded. batch_size: int The training batch size. Returns ------- inputs : TF tensor The network input. logits: TF tensor The network output before SoftMax. ground_truth: TF tensor The desired output from the ground truth. keep_prob: TF float The TF variable holding the keep probability for drop out layers. """ pool_size = 2 padding = "SAME" # Define inputs and helper functions # with tf.variable_scope('inputs'): inputs = tf.placeholder(tf.float32, shape=(batch_size, side_length, side_length, in_channels), name='inputs') if sparse_labels: ground_truth = tf.placeholder(tf.int32, shape=(batch_size, side_length, side_length), name='labels') else: ground_truth = tf.placeholder(tf.float32, shape=(batch_size, side_length, side_length, out_channels), name='labels') keep_prob = tf.placeholder(tf.float32, shape=[], name='keep_prob') network_input = tf.transpose(inputs, perm=[0, 3, 1, 2]) # [conv -> conv -> max pool -> drop out] + parameter updates def step_down(name, _input): with tf.variable_scope(name): conv_out = layers.conv_block(_input, filter_size, channel_multiplier=2, convolutions=convolutions, padding=padding, data_format="NCHW") pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW") result = layers.dropout(pool_out, keep_prob) return result, conv_out # parameter updates + [upconv and concat -> drop out -> conv -> conv] def step_up(name, bottom_input, side_input): with tf.variable_scope(name): concat_out = layers.upconv_concat_block(bottom_input, side_input, data_format="NCHW") drop_out = layers.dropout(concat_out, keep_prob) result = layers.conv_block(drop_out, filter_size, channel_multiplier=0.5, convolutions=convolutions, padding=padding, data_format="NCHW") return result # Build the network # with tf.variable_scope('contracting'): # Set initial parameters outputs = [] # Build contracting path with tf.variable_scope("step_0"): conv_out = layers.conv_block(network_input, filter_size, out_filters=start_filters, convolutions=convolutions, padding=padding, data_format="NCHW") pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW") current_tensor = layers.dropout(pool_out, keep_prob) outputs.append(conv_out) for i in xrange(1, depth): current_tensor, conv_out = step_down("step_" + str(i), current_tensor) outputs.append(conv_out) # Bottom [conv -> conv] with tf.variable_scope("step_" + str(depth)): current_tensor = layers.conv_block(current_tensor, filter_size, channel_multiplier=2, convolutions=convolutions, padding=padding, data_format="NCHW") with tf.variable_scope("expanding"): # Set initial parameter outputs.reverse() # Build expanding path for i in xrange(depth): current_tensor = step_up("step_" + str(depth + i + 1), current_tensor, outputs[i]) # Last layer is a 1x1 convolution to get the predictions # We don't want an activation function for this one (softmax will be applied later), so we're doing it manually in_filters = current_tensor.shape.as_list()[1] stddev = np.sqrt(2. / in_filters) with tf.variable_scope("classification"): weight = layers.weight_variable([1, 1, in_filters, out_channels], stddev, name="weights") bias = layers.bias_variable([out_channels, 1, 1], name="biases") conv = tf.nn.conv2d(current_tensor, weight, strides=[1, 1, 1, 1], padding="VALID", name="conv", data_format="NCHW") logits = conv + bias logits = tf.transpose(logits, perm=[0, 2, 3, 1]) return inputs, logits, ground_truth, keep_prob
def alexnet(inputs, num_classes, keep_prob): """Create alexnet model """ x = tf.reshape(inputs, shape=[-1, 28, 28, 1]) # first conv layer, downsampling layer, and normalization layer conv1 = conv2d(x, shape=(11, 11, 1, 96), padding='SAME', name='conv1') pool1 = max_pooling(conv1, ksize=(2, 2), stride=(2, 2), padding='SAME', name='pool1') norm1 = norm(pool1, radius=4, name='norm1') # second conv layer conv2 = conv2d(norm1, shape=(5, 5, 96, 256), padding='SAME', name='conv2') pool2 = max_pooling(conv2, ksize=(2, 2), stride=(2, 2), padding='SAME', name='pool2') norm2 = norm(pool2, radius=4, name='norm2') # 3rd conv layer conv3 = conv2d(norm2, shape=(3, 3, 256, 384), padding='SAME', name='conv3') # pool3 = max_pooling(conv3, ksize=(2, 2), stride=(2, 2), padding='SAME', name='pool3') norm3 = norm(conv3, radius=4, name='norm3') # 4th conv layer conv4 = conv2d(norm3, shape=(3, 3, 384, 384), padding='SAME', name='conv4') # 5th conv layer conv5 = conv2d(conv4, shape=(3, 3, 384, 256), padding='SAME', name='conv5') pool5 = max_pooling(conv5, ksize=(2, 2), stride=(2, 2), padding='SAME', name='pool5') norm5 = norm(pool5, radius=4, name='norm5') # first fully connected layer fc1 = tf.reshape(norm5, shape=(-1, 4 * 4 * 256)) fc1 = fc(fc1, shape=(4 * 4 * 256, 4096), name='fc1') fc1 = dropout(fc1, keep_prob=keep_prob, name='dropout1') fc2 = fc(fc1, shape=(4096, 4096), name='fc2') fc2 = dropout(fc2, keep_prob=keep_prob, name='dropout2') # output logits value with tf.variable_scope('classifier') as scope: weights = tf.get_variable('weights', shape=[4096, num_classes], initializer=tf.initializers.he_normal()) biases = tf.get_variable('biases', shape=[num_classes], initializer=tf.initializers.random_normal()) # define output logits value logits = tf.add(tf.matmul(fc2, weights), biases, name=scope.name + '_logits') return logits
def __init_network(self): with tf.variable_scope('mobilenet_encoder'): # Preprocessing as done in the paper with tf.name_scope('pre_processing'): preprocessed_input = (self.X - self.mean_img) / 255.0 # Model is here! conv1_1 = conv2d('conv_1', preprocessed_input, num_filters=int(round(32 * self.args.width_multiplier)), kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, l2_strength=self.args.l2_strength, bias=self.args.bias) self.__add_to_nodes([conv1_1]) ############################################################################################ conv2_1_dw, conv2_1_pw = depthwise_separable_conv2d('conv_ds_2', conv1_1, width_multiplier=self.args.width_multiplier, num_filters=64, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv2_1_dw, conv2_1_pw]) conv2_2_dw, conv2_2_pw = depthwise_separable_conv2d('conv_ds_3', conv2_1_pw, width_multiplier=self.args.width_multiplier, num_filters=128, kernel_size=(3, 3), padding='SAME', stride=(2, 2), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv2_2_dw, conv2_2_pw]) ############################################################################################ conv3_1_dw, conv3_1_pw = depthwise_separable_conv2d('conv_ds_4', conv2_2_pw, width_multiplier=self.args.width_multiplier, num_filters=128, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv3_1_dw, conv3_1_pw]) conv3_2_dw, conv3_2_pw = depthwise_separable_conv2d('conv_ds_5', conv3_1_pw, width_multiplier=self.args.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(2, 2), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv3_2_dw, conv3_2_pw]) ############################################################################################ conv4_1_dw, conv4_1_pw = depthwise_separable_conv2d('conv_ds_6', conv3_2_pw, width_multiplier=self.args.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv4_1_dw, conv4_1_pw]) conv4_2_dw, conv4_2_pw = depthwise_separable_conv2d('conv_ds_7', conv4_1_pw, width_multiplier=self.args.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(2, 2), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv4_2_dw, conv4_2_pw]) ############################################################################################ conv5_1_dw, conv5_1_pw = depthwise_separable_conv2d('conv_ds_8', conv4_2_pw, width_multiplier=self.args.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv5_1_dw, conv5_1_pw]) conv5_2_dw, conv5_2_pw = depthwise_separable_conv2d('conv_ds_9', conv5_1_pw, width_multiplier=self.args.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv5_2_dw, conv5_2_pw]) conv5_3_dw, conv5_3_pw = depthwise_separable_conv2d('conv_ds_10', conv5_2_pw, width_multiplier=self.args.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv5_3_dw, conv5_3_pw]) conv5_4_dw, conv5_4_pw = depthwise_separable_conv2d('conv_ds_11', conv5_3_pw, width_multiplier=self.args.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv5_4_dw, conv5_4_pw]) conv5_5_dw, conv5_5_pw = depthwise_separable_conv2d('conv_ds_12', conv5_4_pw, width_multiplier=self.args.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv5_5_dw, conv5_5_pw]) conv5_6_dw, conv5_6_pw = depthwise_separable_conv2d('conv_ds_13', conv5_5_pw, width_multiplier=self.args.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(2, 2), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv5_6_dw, conv5_6_pw]) ############################################################################################ conv6_1_dw, conv6_1_pw = depthwise_separable_conv2d('conv_ds_14', conv5_6_pw, width_multiplier=self.args.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=self.args.batchnorm_enabled, activation=tf.nn.relu6, is_training=self.is_training, l2_strength=self.args.l2_strength, biases=(self.args.bias, self.args.bias)) self.__add_to_nodes([conv6_1_dw, conv6_1_pw]) ############################################################################################ avg_pool = avg_pool_2d(conv6_1_pw, size=(7, 7), stride=(1, 1)) dropped = dropout(avg_pool, self.args.dropout_keep_prob, self.is_training) self.logits = flatten(conv2d('fc', dropped, kernel_size=(1, 1), num_filters=self.args.num_classes, l2_strength=self.args.l2_strength, bias=self.args.bias)) self.__add_to_nodes([avg_pool, dropped, self.logits])
def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000): ####################################### SETTINGS ################################### self.x_train = x_train self.x_test = x_test self.diff = diff self.batch_size = 100. self.learning_rate = theano.shared(np.float32(0.0008)) self.momentum = 0.3 self.performance = {"train": [], "test": []} self.inpt = T.ftensor4(name='input') self.df = T.fmatrix(name='differential') self.dim_z = dim_z self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z]))) self.activation = T.abs_ self.generative = False self.out_distribution = False #self.y = T.matrix(name="y") self.in_filters = [32, 32, 32, 32] self.filter_lengths = [11., 11., 11., 11.] self.params = [] #magic = 73888. self.magic = magic self.dropout_symbolic = T.fscalar() self.dropout_prob = theano.shared(np.float32(0.0)) ####################################### LAYERS ###################################### # LAYER 1 ############################## self.conv1 = one_d_conv_layer(self.inpt, self.in_filters[0], 1, self.filter_lengths[0], param_names=["W1", 'b1']) self.params += self.conv1.params self.bn1 = batchnorm(self.conv1.output) self.nl1 = self.activation(self.bn1.X) self.maxpool1 = ds.max_pool_2d(self.nl1, [3, 1], st=[2, 1], ignore_border=False).astype( theano.config.floatX) self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic) # LAYER 2 ############################## self.conv2 = one_d_conv_layer(self.layer1_out, self.in_filters[1], self.in_filters[0], self.filter_lengths[1], param_names=["W2", 'b2']) self.params += self.conv2.params self.bn2 = batchnorm(self.conv2.output) self.nl2 = self.activation(self.bn2.X) self.maxpool2 = ds.max_pool_2d(self.nl2, [3, 1], st=[2, 1], ignore_border=False).astype( theano.config.floatX) self.layer2_out = dropout(self.maxpool2, self.dropout_symbolic) #self.layer1_out = self.maxpool1 # LAYER 3 ################################ self.flattened = T.flatten(self.layer2_out, outdim=2) # Variational Layer ##################### self.latent_layer = variational_gauss_layer(self.flattened, self.magic, dim_z) self.params += self.latent_layer.params self.latent_out = self.latent_layer.output # Hidden Layer ######################### self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic) self.params += self.hidden_layer.params self.hid_out = dropout( self.activation(self.hidden_layer.output).reshape( (self.inpt.shape[0], self.in_filters[-1], int(self.magic / self.in_filters[-1]), 1)), self.dropout_symbolic) # Deconv 1 ###################### self.deconv1 = one_d_deconv_layer(self.hid_out, self.in_filters[2], self.in_filters[2], self.filter_lengths[2], pool=2., param_names=["W3", 'b3'], distribution=False) self.params += self.deconv1.params # Deconv 2 ###################### self.deconv2 = one_d_deconv_layer(self.deconv1.output, 1, self.in_filters[3], self.filter_lengths[3], pool=2., param_names=["W4", 'b4'], distribution=False) self.params += self.deconv2.params #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic) self.tanh_out = self.deconv2.output self.last_layer = self.deconv2 if self.out_distribution == True: self.trunk_sigma = self.last_layer.log_sigma[:, :, :self.inpt. shape[2], :] self.trunc_output = self.tanh_out[:, :, :self.inpt.shape[2], :] ################################### FUNCTIONS ###################################################### self.get_latent_states = theano.function( [self.inpt], self.latent_out, givens=[[self.dropout_symbolic, self.dropout_prob]]) #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior]) #self.get_prior = theano.function([self.inpt],self.latent_layer.prior) #self.convolve1 = theano.function([self.inpt],self.layer1_out) #self.convolve2 = theano.function([self.inpt],self.layer2_out) self.output = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]]) self.get_flattened = theano.function( [self.inpt], self.flattened, givens=[[self.dropout_symbolic, self.dropout_prob]]) #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output) #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output) #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2)) self.output = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]]) self.generate_from_z = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob], [self.latent_out, self.generative_z]]) self.cost = self.MSE() self.mse = self.MSE() #self.likelihood = self.log_px_z() #self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) self.derivatives = T.grad(self.cost, self.params) #self.get_gradients = theano.function([self.inpt],self.derivatives) self.updates = adam(self.params, self.derivatives, self.learning_rate) #self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum) self.train_model = theano.function( inputs=[self.inpt, self.df], outputs=self.cost, updates=self.updates, givens=[[self.dropout_symbolic, self.dropout_prob]])
def unet(in_channels=1, out_channels=2, start_filters=64, input_side_length=572, depth=4, convolutions=2, filter_size=3, sparse_labels=True, batch_size=1, padded_convolutions=False): if not padded_convolutions: raise NotImplementedError("padded_convolutions=False has not yet been implemented!") pool_size = 2 padding = "SAME" if padded_convolutions else "VALID" # Test whether input_side_length fits the depth, number of convolutions per step and filter_size output_side_length = input_side_length if padded_convolutions else get_output_side_length(input_side_length, depth, convolutions, filter_size, pool_size) # Define inputs and helper functions # with tf.variable_scope('inputs'): inputs = tf.placeholder(tf.float32, shape=(batch_size, input_side_length, input_side_length, in_channels), name='inputs') if sparse_labels: ground_truth = tf.placeholder(tf.int32, shape=(batch_size, output_side_length, output_side_length), name='labels') else: ground_truth = tf.placeholder(tf.float32, shape=(batch_size, output_side_length, output_side_length, out_channels), name='labels') keep_prob = tf.placeholder(tf.float32, shape=[], name='keep_prob') network_input = tf.transpose(inputs, perm=[0, 3, 1, 2]) # [conv -> conv -> max pool -> drop out] + parameter updates def step_down(name, _input): with tf.variable_scope(name): conv_out = layers.conv_block(_input, filter_size, channel_multiplier=2, convolutions=convolutions, padding=padding, data_format="NCHW") pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW") result = layers.dropout(pool_out, keep_prob) return result, conv_out # parameter updates + [upconv and concat -> drop out -> conv -> conv] def step_up(name, bottom_input, side_input): with tf.variable_scope(name): concat_out = layers.upconv_concat_block(bottom_input, side_input, data_format="NCHW") drop_out = layers.dropout(concat_out, keep_prob) result = layers.conv_block(drop_out, filter_size, channel_multiplier=0.5, convolutions=convolutions, padding=padding, data_format="NCHW") return result # Build the network # with tf.variable_scope('contracting'): # Set initial parameters outputs = [] # Build contracting path with tf.variable_scope("step_0"): conv_out = layers.conv_block(network_input, filter_size, out_filters=start_filters, convolutions=convolutions, padding=padding, data_format="NCHW") pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW") current_tensor = layers.dropout(pool_out, keep_prob) outputs.append(conv_out) for i in xrange(1, depth): current_tensor, conv_out = step_down("step_" + str(i), current_tensor) outputs.append(conv_out) # Bottom [conv -> conv] with tf.variable_scope("step_" + str(depth)): current_tensor = layers.conv_block(current_tensor, filter_size, channel_multiplier=2, convolutions=convolutions, padding=padding, data_format="NCHW") with tf.variable_scope("expanding"): # Set initial parameter outputs.reverse() # Build expanding path for i in xrange(depth): current_tensor = step_up("step_" + str(depth + i + 1), current_tensor, outputs[i]) # Last layer is a 1x1 convolution to get the predictions # We don't want an activation function for this one (softmax will be applied later), so we're doing it manually in_filters = current_tensor.shape.as_list()[1] stddev = np.sqrt(2. / in_filters) with tf.variable_scope("classification"): weight = layers.weight_variable([1, 1, in_filters, out_channels], stddev, name="weights") bias = layers.bias_variable([out_channels, 1, 1], name="biases") conv = tf.nn.conv2d(current_tensor, weight, strides=[1, 1, 1, 1], padding="VALID", name="conv", data_format="NCHW") logits = conv + bias logits = tf.transpose(logits, perm=[0, 2, 3, 1]) return inputs, logits, ground_truth, keep_prob
def __init__(self,x_train,dim_z=10,batch_size = 10,filter_no = [5.,5.,5.],filter_l = [10.,10.,10.], pooling_d=3,pooling_s=2,learning_rate = 0.0008,dim_y=None,y_train=None,diff=None,magic=5000): ####################################### SETTINGS ################################### self.x_train = x_train self.y_train = y_train if y_train !=None: self.dim_y = dim_y self.diff=diff self.batch_size = batch_size self.learning_rate = theano.shared(np.float32(learning_rate)) self.performance = {"train":[]} self.inpt = T.ftensor4(name='input') self.Y = T.fcol(name= 'label') self.df = T.fmatrix(name='differential') self.dim_z = dim_z self.magic =magic self.pooling_d = pooling_d self.pooling_s = pooling_s self.generative_z = theano.shared(np.float32(np.zeros([1,dim_z]))) self.generative_hid = theano.shared(np.float32(np.zeros([1,magic]))) self.activation =relu self.out_distribution=False self.in_filters = filter_l self.filter_lengths = filter_no self.params = [] self.d_o_prob = theano.shared(np.float32(0.0)) ####################################### LAYERS ###################################### # LAYER 1 ############################## self.conv1 = one_d_conv_layer(self.inpt,self.in_filters[0],1,self.filter_lengths[0],param_names = ["W1",'b1']) self.params+=self.conv1.params self.bn1 = batchnorm(self.conv1.output) self.nl1 = self.activation(self.bn1.X) self.maxpool1 = ds.max_pool_2d(self.nl1,[self.pooling_d,1],st=[self.pooling_s,1],ignore_border = False).astype(theano.config.floatX) self.layer1_out = dropout(self.maxpool1,self.d_o_prob) self.flattened = T.flatten(self.layer1_out,outdim = 2) # Conditional +variational layer layer ##################### if y_train != None: self.c_enc =hidden_layer(self.Y,1,self.dim_y) self.c_dec = hidden_layer(self.Y,1,self.dim_y,param_names = ["W10",'b10']) self.params+=self.c_enc.params self.params+=self.c_dec.params self.c_nl = self.activation(self.c_enc.output) self.c_nl_dec = self.activation(self.c_dec.output) self.concatenated = T.concatenate((self.flattened,self.c_nl),axis = 1) self.latent_layer = variational_gauss_layer(self.concatenated,self.magic+self.dim_y,dim_z) else: self.latent_layer = variational_gauss_layer(self.flattened,self.magic,dim_z) self.params+=self.latent_layer.params self.latent_out = self.latent_layer.output # Hidden Layer ######################### if y_train!= None: self.dec_concat = T.concatenate((self.latent_out,self.c_nl_dec),axis = 1) self.hidden_layer = hidden_layer(self.dec_concat,self.dim_z+self.dim_y,self.magic) else: self.hidden_layer = hidden_layer(self.latent_out,dim_z,self.magic) self.params+=self.hidden_layer.params self.hid_out = dropout(self.activation(self.hidden_layer.output).reshape((self.inpt.shape[0],self.in_filters[-1],int(self.magic/self.in_filters[-1]),1)),self.d_o_prob) # Devonvolutional 1 ###################### self.deconv1 = one_d_deconv_layer(self.hid_out,1,self.in_filters[2],self.filter_lengths[2],pool=self.pooling_d,param_names = ["W3",'b3'],distribution=False) self.params+=self.deconv1.params #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic) self.tanh_out = self.deconv1.output self.last_layer = self.deconv1 if self.out_distribution==True: self.trunk_sigma = self.last_layer.log_sigma[:,:,:self.inpt.shape[2],:] self.trunc_output = self.tanh_out[:,:,:self.inpt.shape[2],:] self.cost = self.MSE() self.mse = self.MSE() #self.likelihood = self.log_px_z() #self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) self.derivatives = T.grad(self.cost,self.params) #self.get_gradients = theano.function([self.inpt],self.derivatives) self.updates =adam(self.params,self.derivatives,self.learning_rate) ################################### FUNCTIONS ###################################################### #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior]) #self.get_prior = theano.function([self.inpt],self.latent_layer.prior) #self.convolve1 = theano.function([self.inpt],self.layer1_out) #self.convolve2 = theano.function([self.inpt],self.layer2_out) #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output) #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output) #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2)) #self.output = theano.function([self.inpt],self.trunc_output,givens=[[self.dropout_symbolic,self.dropout_prob]]) #self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) #self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) #self.get_gradients = theano.function([self.inpt],self.derivatives) self.generate_from_hid = theano.function([self.inpt],self.trunc_output,givens = [[self.hidden_layer.output,self.generative_hid]]) self.get_flattened = theano.function([self.inpt],self.flattened) if self.y_train!=None: self.generate_from_z = theano.function([self.inpt,self.Y],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) self.train_model = theano.function(inputs = [self.inpt,self.df,self.Y],outputs = self.cost,updates = self.updates) self.get_latent_states = theano.function([self.inpt,self.Y],self.latent_out) self.get_c_enc = theano.function([self.Y],self.c_enc.output) self.output = theano.function([self.inpt,self.Y],self.trunc_output) self.get_concat = theano.function([self.inpt,self.Y],self.concatenated) else: self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) self.train_model = theano.function(inputs = [self.inpt,self.df],outputs = self.cost,updates = self.updates) self.output = theano.function([self.inpt],self.trunc_output) self.get_latent_states = theano.function([self.inpt],self.latent_out)
def parameter_efficient(in_channels=1, out_channels=2, start_filters=64, input_side_length=256, depth=4, res_blocks=2, filter_size=3, sparse_labels=True, batch_size=1, activation="cReLU", batch_norm=True): activation = str.lower(activation) if activation not in ["relu", "crelu"]: raise ValueError("activation must be \"ReLU\" or \"cReLU\".") pool_size = 2 # Define inputs and helper functions # with tf.variable_scope('inputs'): inputs = tf.placeholder(tf.float32, shape=(batch_size, input_side_length, input_side_length, in_channels), name='inputs') if sparse_labels: ground_truth = tf.placeholder(tf.int32, shape=(batch_size, input_side_length, input_side_length), name='labels') else: ground_truth = tf.placeholder(tf.float32, shape=(batch_size, input_side_length, input_side_length, out_channels), name='labels') keep_prob = tf.placeholder(tf.float32, shape=[], name='keep_prob') training = tf.placeholder(tf.bool, shape=[], name="training") network_input = tf.transpose(inputs, perm=[0, 3, 1, 2]) # [conv -> conv -> max pool -> drop out] + parameter updates def step_down(name, input_, filter_size=3, res_blocks=2, keep_prob=1., training=False): with tf.variable_scope(name): with tf.variable_scope("res_block_0"): conv_out, tiled_input = layers.res_block(input_, filter_size, channel_multiplier=2, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") for i in xrange(1, res_blocks): with tf.variable_scope("res_block_" + str(i)): conv_out = layers.res_block(conv_out, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") conv_out = conv_out + tiled_input pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW") bottom_out = layers.dropout(pool_out, keep_prob) side_out = layers.dropout(conv_out, keep_prob) return bottom_out, side_out # parameter updates + [upconv and concat -> drop out -> conv -> conv] def step_up(name, bottom_input, side_input, filter_size=3, res_blocks=2, keep_prob=1., training=False): with tf.variable_scope(name): added_input = layers.upconv_add_block(bottom_input, side_input, data_format="NCHW") conv_out = added_input for i in xrange(res_blocks): with tf.variable_scope("res_block_" + str(i)): conv_out = layers.res_block(conv_out, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") result = layers.dropout(conv_out, keep_prob) return result # Build the network # with tf.variable_scope('contracting'): outputs = [] with tf.variable_scope("step_0"): # Conv 1 in_filters = in_channels out_filters = start_filters stddev = np.sqrt(2. / (filter_size**2 * in_filters)) w = layers.weight_variable([filter_size, filter_size, in_filters, out_filters], stddev=stddev, name="weights") out_ = tf.nn.conv2d(network_input, w, [1, 1, 1, 1], padding="SAME", data_format="NCHW") out_ = out_ + layers.bias_variable([out_filters, 1, 1], name='biases') # Batch Norm 1 if batch_norm: out_ = tf.layers.batch_normalization(out_, axis=1, momentum=0.999, center=True, scale=True, training=training, trainable=True, name="batch_norm", fused=True) in_filters = out_filters # concatenated ReLU if activation == "crelu": out_ = tf.concat([out_, -out_], axis=1) in_filters = 2 * in_filters out_ = tf.nn.relu(out_) # Conv 2 stddev = np.sqrt(2. / (filter_size**2 * in_filters)) w = layers.weight_variable([filter_size, filter_size, in_filters, out_filters], stddev=stddev, name="weights") out_ = tf.nn.conv2d(out_, w, [1, 1, 1, 1], padding="SAME", data_format="NCHW") out_ = out_ + layers.bias_variable([out_filters, 1, 1], name='biases') # Res Block 1 conv_out = layers.res_block(out_, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW") bottom_out = layers.dropout(pool_out, keep_prob) side_out = layers.dropout(conv_out, keep_prob) outputs.append(side_out) # Build contracting path for i in xrange(1, depth): bottom_out, side_out = step_down('step_' + str(i), bottom_out, filter_size=filter_size, res_blocks=res_blocks, keep_prob=keep_prob, training=training) outputs.append(side_out) # Bottom [conv -> conv] with tf.variable_scope('step_' + str(depth)): with tf.variable_scope("res_block_0"): conv_out, tiled_input = layers.res_block(bottom_out, filter_size, channel_multiplier=2, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") for i in xrange(1, res_blocks): with tf.variable_scope("res_block_" + str(i)): conv_out = layers.res_block(conv_out, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW") conv_out = conv_out + tiled_input current_tensor = layers.dropout(conv_out, keep_prob) with tf.variable_scope('expanding'): # Set initial parameter outputs.reverse() # Build expanding path for i in xrange(depth): current_tensor = step_up('step_' + str(depth + i + 1), current_tensor, outputs[i], filter_size=filter_size, res_blocks=res_blocks, keep_prob=keep_prob, training=training) # Last layer is a 1x1 convolution to get the predictions # We don't want an activation function for this one (softmax will be applied later), so we're doing it manually in_filters = current_tensor.shape.as_list()[1] stddev = np.sqrt(2. / in_filters) with tf.variable_scope('classification'): w = layers.weight_variable([1, 1, in_filters, out_channels], stddev, name='weights') b = layers.bias_variable([out_channels, 1, 1], name='biases') conv = tf.nn.conv2d(current_tensor, w, strides=[1, 1, 1, 1], padding="SAME", data_format="NCHW", name='conv') logits = conv + b logits = tf.transpose(logits, perm=[0, 2, 3, 1]) return inputs, logits, ground_truth, keep_prob, training
def forward(self, x1, x2): """Inputs: x1 = premise word indices [batch * len_1] x1_f = premise word features indices [batch * len_1 * nfeat] x1_pos = premise POS tags [batch * len_1] x1_ner = premise entity tags [batch * len_1] x1_mask = premise padding mask [batch * len_1] x2 = hypothesis word indices [batch * len_2] x2_f = hypothesis word features indices [batch * len_2 * nfeat] x2_pos = hypothesis POS tags [batch * len_2] x2_ner = hypothesis entity tags [batch * len_2] x2_mask = hypothesis padding mask [batch * len_2] """ # Prepare premise and hypothesis input Prnn_input_list = [] Hrnn_input_list = [] # Word embeddings emb = self.embedding if self.training else self.eval_embed x1_emb, x2_emb = emb(x1), emb(x2) # Dropout on embeddings if self.opt['dropout_emb'] > 0: x1_emb = layers.dropout(x1_emb, p=self.opt['dropout_emb'], training=self.training) x2_emb = layers.dropout(x2_emb, p=self.opt['dropout_emb'], training=self.training) Prnn_input_list.append(x1_emb) Hrnn_input_list.append(x2_emb) # # Contextualized embeddings # _, x1_cove = self.CoVe(x1, x1_mask) # _, x2_cove = self.CoVe(x2, x2_mask) # if self.opt['dropout_emb'] > 0: # x1_cove = layers.dropout(x1_cove, p=self.opt['dropout_emb'], training=self.training) # x2_cove = layers.dropout(x2_cove, p=self.opt['dropout_emb'], training=self.training) # Prnn_input_list.append(x1_cove) # Hrnn_input_list.append(x2_cove) # # # POS embeddings # x1_pos_emb = self.pos_embedding(x1_pos) # x2_pos_emb = self.pos_embedding(x2_pos) # Prnn_input_list.append(x1_pos_emb) # Hrnn_input_list.append(x2_pos_emb) # # # NER embeddings # x1_ner_emb = self.ner_embedding(x1_ner) # x2_ner_emb = self.ner_embedding(x2_ner) # Prnn_input_list.append(x1_ner_emb) # Hrnn_input_list.append(x2_ner_emb) # # x1_input = torch.cat(Prnn_input_list, 2) # x2_input = torch.cat(Hrnn_input_list, 2) # Now the features are ready # x1_input: [batch_size, doc_len, input_size] # x2_input: [batch_size, doc_len, input_size] x1_input = x1_emb x2_input = x2_emb # if self.opt['full_att_type'] == 2: # x1_f = layers.dropout(x1_f, p=self.opt['dropout_EM'], training=self.training) # x2_f = layers.dropout(x2_f, p=self.opt['dropout_EM'], training=self.training) # Paux_input, Haux_input = x1_f, x2_f # else: # Paux_input = x1_f[:, :, 0].contiguous().view(x1_f.size(0), x1_f.size(1), 1) # Haux_input = x2_f[:, :, 0].contiguous().view(x2_f.size(0), x2_f.size(1), 1) # Encode premise with RNN P_abstr_ls = self.P_rnn(x1_input) # Encode hypothesis with RNN H_abstr_ls = self.H_rnn(x2_input) # Fusion if self.opt['full_att_type'] == 0: P_atts = P_abstr_ls[-1].contiguous() H_atts = H_abstr_ls[-1].contiguous() P_xs = P_abstr_ls[-1].contiguous() H_xs = H_abstr_ls[-1].contiguous() elif self.opt['full_att_type'] == 1: P_atts = torch.cat([x1_input] + P_abstr_ls, 2) H_atts = torch.cat([x2_input] + H_abstr_ls, 2) P_xs = P_abstr_ls[-1].contiguous() H_xs = H_abstr_ls[-1].contiguous() elif self.opt['full_att_type'] == 2: P_atts = torch.cat([x1_input] + P_abstr_ls, 2) H_atts = torch.cat([x2_input] + H_abstr_ls, 2) P_xs = torch.cat(P_abstr_ls, 2) H_xs = torch.cat(H_abstr_ls, 2) aP_xs = self.full_attn_P(P_atts, H_atts, P_xs, H_xs, None) aH_xs = self.full_attn_H(H_atts, P_atts, H_xs, P_xs, None) P_hiddens = torch.cat([P_xs, aP_xs], 2) H_hiddens = torch.cat([H_xs, aH_xs], 2) # Inference on premise and hypothesis P_hiddens = torch.cat(self.P_infer_rnn(P_hiddens, None), 2) H_hiddens = torch.cat(self.H_infer_rnn(H_hiddens, None), 2) # Merge hiddens for answer classification if self.opt['final_merge'] == 'avg': P_merge_weights = layers.uniform_weights(P_hiddens, None) H_merge_weights = layers.uniform_weights(H_hiddens, None) elif self.opt['final_merge'] == 'linear_self_attn': P_merge_weights = self.self_attn_P(P_hiddens, None) H_merge_weights = self.self_attn_H(H_hiddens, None) P_avg_hidden = layers.weighted_avg(P_hiddens, P_merge_weights) H_avg_hidden = layers.weighted_avg(H_hiddens, H_merge_weights) P_max_hidden = torch.max(P_hiddens, 1)[0] H_max_hidden = torch.max(H_hiddens, 1)[0] # Predict scores for different classes scores = self.classifier( torch.cat([P_avg_hidden, H_avg_hidden, P_max_hidden, H_max_hidden], 1)) return scores # -inf to inf
def __init__(self, numpy_rng, theano_rng=None, n_ins=N_FEATURES * N_FRAMES, relu_layers_sizes=[1024, 1024, 1024], recurrent_connections=[2], # layer(s), can only be i^t -> i^{t+1} n_outs=62 * 3, rho=0.9, eps=1.E-6): """ TODO """ self.relu_layers = [] self.dropout_relu_layers = [] self.params = [] self.dropout_params = [] self.n_layers = len(relu_layers_sizes) self._rho = rho # ``momentum'' for adadelta self._eps = eps # epsilon for adadelta self._accugrads = [] # for adadelta self._accudeltas = [] # for adadelta self.n_outs = n_outs assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.fmatrix('x') self.y = T.ivector('y') input_dropout_rate = IN_DROPOUT_RATE for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = relu_layers_sizes[i-1] if i == 0: layer_input = self.x dropout_layer_input = dropout(numpy_rng, self.x, p=input_dropout_rate) else: layer_input = self.relu_layers[-1].output dropout_layer_input = self.dropout_relu_layers[-1].output input_dropout_rate = self.dropout_relu_layers[-1].dropout_rate if i in recurrent_connections: # TODO inputr_size = relu_layers_sizes[i] previous_output = T.fmatrix('previous_output') relu_layer = RecurrentReLU(rng=numpy_rng, input=layer_input, in_stack=previous_output, n_in=input_size, n_in_stack=inputr_size, n_out=inputr_size) #relu_layer.in_stack = relu_layer.output # TODO TODO TODO # /TODO self.params.extend(relu_layer.params) self._accugrads.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accugrad_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accugrad_Ws', borrow=True)]) self._accudeltas.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accudelta_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accudelta_Ws', borrow=True)]) else: dropout_relu_layer = DropoutReLU(rng=numpy_rng, input=dropout_layer_input, n_in=input_size, n_out=relu_layers_sizes[i]) relu_layer = ReLU(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=relu_layers_sizes[i], W=dropout_relu_layer.W * (1 - input_dropout_rate), b=dropout_relu_layer.b * (1 - input_dropout_rate)) #b=dropout_relu_layer.b) TODO check self.dropout_params.extend(dropout_relu_layer.params) self.params.extend(relu_layer.params) self._accugrads.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)]) self._accudeltas.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)]) self.dropout_relu_layers.append(dropout_relu_layer) self.relu_layers.append(relu_layer) # We now need to add a logistic layer on top of the MLP self.dropout_logLayer = LogisticRegression( input=self.dropout_relu_layers[-1].output, n_in=relu_layers_sizes[-1], n_out=n_outs) self.logLayer = LogisticRegression( # TODO check weights multiplication input=self.relu_layers[-1].output, n_in=relu_layers_sizes[-1], n_out=n_outs, W=self.dropout_logLayer.W * (1 - self.dropout_relu_layers[-1].dropout_rate), b=self.dropout_logLayer.b * (1 - self.dropout_relu_layers[-1].dropout_rate)) #b=self.dropout_logLayer.b) TODO check self.dropout_params.extend(self.dropout_logLayer.params) self.params.extend(self.logLayer.params) self._accugrads.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accugrad_b', borrow=True)]) self._accudeltas.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accudelta_b', borrow=True)]) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.dropout_finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y) self.dropout_finetune_cost_sum = self.dropout_logLayer.negative_log_likelihood_sum(self.y) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.finetune_cost_sum = self.logLayer.negative_log_likelihood_sum(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
def img_conv_group(input, conv_num_filter, pool_size, conv_padding=1, conv_filter_size=3, conv_act=None, conv_with_batchnorm=False, conv_batchnorm_drop_rate=None, pool_stride=1, pool_type=None, main_program=None, startup_program=None): """ Image Convolution Group, Used for vgg net. """ tmp = input assert isinstance(conv_num_filter, list) or \ isinstance(conv_num_filter, tuple) def __extend_list__(obj): if not hasattr(obj, '__len__'): return [obj] * len(conv_num_filter) else: return obj conv_padding = __extend_list__(conv_padding) conv_filter_size = __extend_list__(conv_filter_size) conv_with_batchnorm = __extend_list__(conv_with_batchnorm) conv_batchnorm_drop_rate = __extend_list__(conv_batchnorm_drop_rate) for i in xrange(len(conv_num_filter)): local_conv_act = conv_act if conv_with_batchnorm[i]: local_conv_act = None tmp = layers.conv2d( input=tmp, num_filters=conv_num_filter[i], filter_size=conv_filter_size[i], padding=conv_padding[i], act=local_conv_act, main_program=main_program, startup_program=startup_program) if conv_with_batchnorm[i]: tmp = layers.batch_norm( input=tmp, act=conv_act, main_program=main_program, startup_program=startup_program) drop_rate = conv_batchnorm_drop_rate[i] if abs(drop_rate) > 1e-5: tmp = layers.dropout( x=tmp, dropout_prob=drop_rate, main_program=main_program, startup_program=startup_program) pool_out = layers.pool2d( input=tmp, pool_size=pool_size, pool_type=pool_type, pool_stride=pool_stride, main_program=main_program, startup_program=startup_program) return pool_out
def img_conv_group(input, conv_num_filter, pool_size, conv_padding=1, conv_filter_size=3, conv_act=None, param_attr=None, conv_with_batchnorm=False, conv_batchnorm_drop_rate=0.0, pool_stride=1, pool_type="max", use_cudnn=True, use_mkldnn=False): """ The Image Convolution Group is composed of Convolution2d, BatchNorm, DropOut, and Pool2d. According to the input arguments, img_conv_group will do serials of computation for Input using Convolution2d, BatchNorm, DropOut, and pass the last result to Pool2d. Args: input (Variable): The input image with [N, C, H, W] format. conv_num_filter(list|tuple): Indicates the numbers of filter of this group. pool_size (int|list|tuple): The pooling size of Pool2d Layer. If pool_size is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W). Otherwise, the pool_size_H = pool_size_W = pool_size. conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is a list or tuple, its length must be equal to the length of conv_num_filter. Otherwise the conv_padding of all Conv2d Layers are the same. Default 1. conv_filter_size (int|list|tuple): The filter size. If filter_size is a list or tuple, its length must be equal to the length of conv_num_filter. Otherwise the conv_filter_size of all Conv2d Layers are the same. Default 3. conv_act (str): Activation type for Conv2d Layer that is not followed by BatchNorm. Default: None. param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None conv_with_batchnorm (bool|list): Indicates whether to use BatchNorm after Conv2d Layer. If conv_with_batchnorm is a list, its length must be equal to the length of conv_num_filter. Otherwise, conv_with_batchnorm indicates whether all the Conv2d Layer follows a BatchNorm. Default False. conv_batchnorm_drop_rate (float|list): Indicates the drop_rate of Dropout Layer after BatchNorm. If conv_batchnorm_drop_rate is a list, its length must be equal to the length of conv_num_filter. Otherwise, drop_rate of all Dropout Layers is conv_batchnorm_drop_rate. Default 0.0. pool_stride (int|list|tuple): The pooling stride of Pool2d layer. If pool_stride is a list or tuple, it must contain two integers, (pooling_stride_H, pooling_stride_W). Otherwise, the pooling_stride_H = pooling_stride_W = pool_stride. Default 1. pool_type (str): Pooling type can be :math:`max` for max-pooling and :math:`avg` for average-pooling. Default :math:`max`. use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn library is installed. Default: True use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled with mkldnn library. Default: False Return: Variable: The final result after serial computation using Convolution2d, BatchNorm, DropOut, and Pool2d. Examples: .. code-block:: python img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') conv_pool = fluid.nets.img_conv_group(input=img, num_channels=3, conv_padding=1, conv_num_filter=[3, 3], conv_filter_size=3, conv_act="relu", pool_size=2, pool_stride=2) """ tmp = input assert isinstance(conv_num_filter, list) or \ isinstance(conv_num_filter, tuple) def __extend_list__(obj): if not hasattr(obj, '__len__'): return [obj] * len(conv_num_filter) else: assert len(obj) == len(conv_num_filter) return obj conv_padding = __extend_list__(conv_padding) conv_filter_size = __extend_list__(conv_filter_size) param_attr = __extend_list__(param_attr) conv_with_batchnorm = __extend_list__(conv_with_batchnorm) conv_batchnorm_drop_rate = __extend_list__(conv_batchnorm_drop_rate) for i in xrange(len(conv_num_filter)): local_conv_act = conv_act if conv_with_batchnorm[i]: local_conv_act = None tmp = layers.conv2d(input=tmp, num_filters=conv_num_filter[i], filter_size=conv_filter_size[i], padding=conv_padding[i], param_attr=param_attr[i], act=local_conv_act, use_cudnn=use_cudnn, use_mkldnn=use_mkldnn) if conv_with_batchnorm[i]: tmp = layers.batch_norm(input=tmp, act=conv_act, in_place=True) drop_rate = conv_batchnorm_drop_rate[i] if abs(drop_rate) > 1e-5: tmp = layers.dropout(x=tmp, dropout_prob=drop_rate) pool_out = layers.pool2d(input=tmp, pool_size=pool_size, pool_type=pool_type, pool_stride=pool_stride, use_cudnn=use_cudnn, use_mkldnn=use_mkldnn) return pool_out