Ejemplo n.º 1
0
def build_model(t_params, n_dim_img, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, optimizer):
    '''
    Build the whole model for training
    '''
    x = tensor.tensor3('x', config.floatX)
    mask_x = tensor.matrix('mask_x', 'int8')
    # Encoder(s) and initialization of hidden layer
    enc = gru(mask_x, dropout(x), t_params, n_dim_img, n_dim_enc, 'enc')[-1]
    init_h = tensor.tanh(dense(enc, t_params, n_dim_enc, n_dim_dec, 'init_h'))

    y = tensor.matrix('y', 'int32')
    mask_y = tensor.matrix('mask_y', 'int8')
    n_steps, n_samples = y.shape
    # Word embedding
    emb = embedding(y, t_params, n_dim_vocab, n_dim_txt, 'emb').reshape((n_steps, n_samples, n_dim_txt))[: -1]
    emb = tensor.concatenate([tensor.zeros((1, n_samples, n_dim_txt), config.floatX), emb])
    # Decoder(s)
    dec = gru(mask_y, emb, t_params, n_dim_txt, n_dim_dec, 'dec', init_h=init_h)
    # Full-connected layer
    fc = dense(dropout(dec), t_params, n_dim_dec, n_dim_vocab, 'fc')
    # Classifier
    prob = tensor.nnet.softmax(fc.reshape((n_steps * n_samples, n_dim_vocab)))
    # Cost function
    cost = prob[tensor.arange(n_steps * n_samples), y.flatten()].reshape((n_steps, n_samples))
    cost = ((-tensor.log(cost + 1e-6) * mask_y).sum(0) / mask_y.astype(config.floatX).sum(0)).mean()
    grads = tensor.grad(cost, list(t_params.values()))
    f_cost, f_update = optimizer(tensor.scalar('lr'), t_params, grads, [x, mask_x, y, mask_y], cost)

    return f_cost, f_update
Ejemplo n.º 2
0
 def __init__(self,layer_nums=0,activation='relu',dropout=False):
     self.layers=[]
     if layer_nums==0: return
     for i in range(len(layer_nums)-2):
         self.add(layers.linear(layer_nums[i],layer_nums[i+1]))
         self.add(self.str_to_layer(activation)())
         if dropout: self.add(layers.dropout())
     self.add(layers.linear(layer_nums[-2],layer_nums[-1]))
     self.add(layers.softmax())
Ejemplo n.º 3
0
    def __init__(self, numpy_rng, theano_rng=None, 
            n_ins=40*3,
            layers_types=[Linear, ReLU, ReLU, ReLU, LogisticRegression],
            layers_sizes=[1024, 1024, 1024, 1024],
            dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
            n_outs=62 * 3,
            rho=0.9, eps=1.E-6,  # TODO refine
            debugprint=False):
        super(DropoutNet, self).__init__(numpy_rng, theano_rng, n_ins,
                layers_types, layers_sizes, n_outs, rho, eps, debugprint)

        self.dropout_rates = dropout_rates
        dropout_layer_input = dropout(numpy_rng, self.x, p=dropout_rates[0])
        self.dropout_layers = []

        for layer, layer_type, n_in, n_out, dr in zip(self.layers,
                layers_types, self.layers_ins, self.layers_outs,
                dropout_rates[1:] + [0]):  # !!! we do not dropout anything 
                                            # from the last layer !!!
            this_layer = layer_type(rng=numpy_rng,
                    input=dropout_layer_input, n_in=n_in, n_out=n_out,
                    W=layer.W * 1. / (1. - dr), # experimental
                    b=layer.b * 1. / (1. - dr)) # TODO check
            assert hasattr(this_layer, 'output')
            # N.B. dropout with dr=1 does not dropanything!!
            this_layer.output = dropout(numpy_rng, this_layer.output, dr)
            self.dropout_layers.append(this_layer)
            dropout_layer_input = this_layer.output

        assert hasattr(self.layers[-1], 'training_cost')
        assert hasattr(self.layers[-1], 'errors')
        # TODO standardize cost
        # these are the dropout costs
        self.mean_cost = self.dropout_layers[-1].negative_log_likelihood(self.y)
        self.cost = self.dropout_layers[-1].training_cost(self.y)

        # these is the non-dropout errors
        self.errors = self.layers[-1].errors(self.y)
Ejemplo n.º 4
0
    def char_model(self, is_training, hparams, chars, embedding_char_size,
                   tags, inputs_char, indexs_start, indexs_end, targets_w):
        """Character model."""
        with tf.variable_scope('chars'):
            if is_training:
                embed_dims = [chars, embedding_char_size]
                np.random.seed(seed=1)
                embeddings_char = np.random.randn(*embed_dims).astype(
                    np.float32)
                cembed = tf.get_variable('char_embeddings',
                                         dtype=tf.float32,
                                         initializer=embeddings_char)
            else:
                cembed = tf.get_variable('char_embeddings')

            # joint for both
            embed_nd = tf.nn.embedding_lookup(cembed, inputs_char[:, :])
            embed = layers.dropout(is_training, hparams.embed_keep_prob_ch,
                                   embed_nd)

            output_fw, output_bw, _ = layers.lstm_layers(
                is_training, embed, hparams.num_layers_chars,
                hparams.hidden_char_size, hparams.recur_keep_prob)

            # Gather forward start and end of word of char LSTM output.
            output_fw_fst = tf.gather_nd(output_fw, indexs_start)
            output_fw_lst = tf.gather_nd(output_fw, indexs_end)

            # Gather backword start and end of word of char LSTM output.
            output_bw_fst = tf.gather_nd(output_bw, indexs_start)
            output_bw_lst = tf.gather_nd(output_bw, indexs_end)

            # Gathered LSTM outputs into the right shape and concatenate it.
            outputs = tf.concat(
                [output_fw_fst, output_fw_lst, output_bw_fst, output_bw_lst],
                axis=2)

            outputs = layers.mlp(is_training,
                                 outputs,
                                 output_size=hparams.mlp_size,
                                 keep_prob=hparams.keep_prob)

            targets = targets_w[:, :]
            tok_keep = tf.to_float(tf.greater(targets, PAD))

            linear = layers.linear_with_dropout(is_training,
                                                outputs,
                                                tags,
                                                keep_prob=hparams.keep_prob)
            preds = tf.to_int32(tf.argmax(linear, axis=-1))

            if is_training:
                int_tok_keep = tf.to_int32(tok_keep)
                t_correct = tf.to_int32(tf.equal(preds,
                                                 targets)) * int_tok_keep
                accuracy = tf.reduce_sum(t_correct) / tf.reduce_sum(
                    int_tok_keep)

                loss = tf.losses.sparse_softmax_cross_entropy(
                    targets, linear, tok_keep)
                return loss, accuracy
            else:
                return preds, outputs
Ejemplo n.º 5
0
def build_30s(color_inputs, num_classes, is_training):
    """
    Build unet network:
    ----------
    Args:
        color_inputs: Tensor, [batch_size, length, 3]
        num_classes: Integer, number of segmentation (annotation) labels
        is_training: Boolean, in training mode or not (for dropout & bn)
    Returns:
        logits: Tensor, predicted annotated image flattened 
                              [batch_size * length,  num_classes]
    """

    dropout_keep_prob = tf.where(is_training, 0.2, 1.0)

    # Encoder Section
    # Block 1
    # color_conv1_1 = layers.conv_btn(color_inputs,  [3, 3], 64, 'conv1_1', is_training = is_training)

    color_conv1_1 = layers.conv_btn1(color_inputs,
                                     3,
                                     32,
                                     'conv1_1',
                                     is_training=is_training)
    #layers.conv1(current_layer, c, ksize, stride=2, scope='conv{}'.format(i + 1), padding='SAME')
    color_conv1_2 = layers.conv_btn1(color_conv1_1,
                                     3,
                                     32,
                                     'conv1_2',
                                     is_training=is_training)
    color_pool1 = layers.maxpool(color_conv1_2, 4, 'pool1')

    # Block 2
    color_conv2_1 = layers.conv_btn1(color_pool1,
                                     3,
                                     32,
                                     'conv2_1',
                                     is_training=is_training)
    color_conv2_2 = layers.conv_btn1(color_conv2_1,
                                     3,
                                     32,
                                     'conv2_2',
                                     is_training=is_training)
    color_pool2 = layers.maxpool(color_conv2_2, 4, 'pool2')
    # Block 3
    color_conv3_1 = layers.conv_btn1(color_pool2,
                                     3,
                                     64,
                                     'conv3_1',
                                     is_training=is_training)
    color_conv3_2 = layers.conv_btn1(color_conv3_1,
                                     3,
                                     64,
                                     'conv3_2',
                                     is_training=is_training)
    color_pool3 = layers.maxpool(color_conv3_2, 4, 'pool3')
    color_drop3 = layers.dropout(color_pool3, dropout_keep_prob, 'drop3')
    # Block 4
    color_conv4_1 = layers.conv_btn1(color_drop3,
                                     3,
                                     64,
                                     'conv4_1',
                                     is_training=is_training)
    color_conv4_2 = layers.conv_btn1(color_conv4_1,
                                     3,
                                     64,
                                     'conv4_2',
                                     is_training=is_training)
    color_pool4 = layers.maxpool(color_conv4_2, 4, 'pool4')
    color_drop4 = layers.dropout(color_pool4, dropout_keep_prob, 'drop4')

    # Block 5
    color_conv5_1 = layers.conv_btn1(color_drop4,
                                     3,
                                     128,
                                     'conv5_1',
                                     is_training=is_training)
    color_conv5_2 = layers.conv_btn1(color_conv5_1,
                                     3,
                                     128,
                                     'conv5_2',
                                     is_training=is_training)
    color_drop5 = layers.dropout(color_conv5_2, dropout_keep_prob, 'drop5')

    # Decoder Section
    # Block 1

    upsample61 = layers.deconv_upsample(color_drop5, 4, 'upsample6')
    upsample61 = Cropping1D(cropping=((0, 1)))(upsample61)
    concat6 = layers.concat(upsample61, color_conv4_2, 'concat6')
    color_conv6_1 = layers.conv_btn1(concat6,
                                     3,
                                     128,
                                     'conv6_1',
                                     is_training=is_training)
    # color_conv6_2 = layers.conv_btn1(color_conv6_1, 6, 128, 'conv6_2', is_training = is_training)
    color_drop6 = layers.dropout(color_conv6_1, dropout_keep_prob, 'drop6')
    # Block 2
    upsample7 = layers.deconv_upsample(color_drop6, 4, 'upsample7')
    # upsample7 = Cropping1D(cropping=((0, 1)))(upsample7)
    concat7 = layers.concat(upsample7, color_conv3_2, 'concat7')
    color_conv7_1 = layers.conv_btn1(concat7,
                                     3,
                                     64,
                                     'conv7_1',
                                     is_training=is_training)
    # color_conv7_2 = layers.conv_btn1(color_conv7_1, 6, 64, 'conv7_1', is_training = is_training)
    color_drop7 = layers.dropout(color_conv7_1, dropout_keep_prob, 'drop7')

    # Block 3
    upsample81 = layers.deconv_upsample(color_drop7, 4, 'upsample8')
    upsample81 = Cropping1D(cropping=((0, 1)))(upsample81)
    concat8 = layers.concat(upsample81, color_conv2_2, 'concat8')
    color_conv8_1 = layers.conv_btn1(concat8,
                                     3,
                                     32,
                                     'conv8_1',
                                     is_training=is_training)
    # color_conv8_2 = layers.conv_btn1(color_conv8_1, 3, 32, 'conv8_1', is_training = is_training)

    # Block 4
    upsample91 = layers.deconv_upsample(color_conv8_1, 4, 'upsample9')
    upsample91 = Cropping1D(cropping=((1, 2)))(upsample91)
    concat9 = layers.concat(upsample91, color_conv1_2, 'concat9')
    color_conv9_1 = layers.conv_btn1(concat9,
                                     3,
                                     32,
                                     'conv9_1',
                                     is_training=is_training)
    # color_conv9_2 = layers.conv_btn1(color_conv9_1, 3, 32,   'conv9_1', is_training = is_training)

    # Block 5
    score = layers.conv(color_conv9_1,
                        1,
                        num_classes,
                        'score',
                        activation_fn=None)
    logits = tf.reshape(score, (-1, num_classes))
    return logits
Ejemplo n.º 6
0
def scaled_dot_product_attention(queries,
                                 keys,
                                 values,
                                 num_heads=1,
                                 dropout_rate=0.):
    """
    The dot-product attention.

    Attention mechanism can be seen as mapping a query and a set of key-value
    pairs to an output. The output is computed as a weighted sum of the values,
    where the weight assigned to each value is computed by a compatibility
    function (dot-product here) of the query with the corresponding key.

    The dot-product attention can be implemented through (batch) matrix
    multipication as follows:

        .. math::

            Attention(Q, K, V)= softmax(QK^\mathrm{T})V

    Refer to `Attention Is All You Need
    <https://arxiv.org/pdf/1706.03762.pdf>`_.

    Args:
        queries (Variable): The input variable which should be a 3-D Tensor.
        keys (Variable): The input variable which should be a 3-D Tensor.
        values (Variable): The input variable which should be a 3-D Tensor.
        num_heads (int): Head number to compute the scaled dot product
            attention. Default: 1.
        dropout_rate (float): The dropout rate to drop the attention weight.
            Default: 0.0.

    Returns:
        Variable: A 3-D Tensor computed by multi-head scaled dot product\
            attention.

    Raises:
        ValueError: If input queries, keys, values are not 3-D Tensors.

    NOTES:
        1. When num_heads > 1, three linear projections are learned respectively
           to map input queries, keys and values into queries', keys' and values'.
           queries', keys' and values' have the same shapes with queries, keys
           and values.
        2. When num_heads == 1, scaled_dot_product_attention has no learnable
           parameters.

    Examples:
        .. code-block:: python

            queries = fluid.layers.data(name="queries",
                                        shape=[3, 5, 9],
                                        dtype="float32",
                                        append_batch_size=False)
            queries.stop_gradient = False
            keys = fluid.layers.data(name="keys",
                                     shape=[3, 6, 9],
                                     dtype="float32",
                                     append_batch_size=False)
            keys.stop_gradient = False
            values = fluid.layers.data(name="values",
                                       shape=[3, 6, 10],
                                       dtype="float32",
                                       append_batch_size=False)
            values.stop_gradient = False
            contexts = fluid.nets.scaled_dot_product_attention(queries, keys, values)
            contexts.shape  # [3, 5, 10]
    """
    if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3):
        raise ValueError(
            "Inputs quries, keys and values should all be 3-D tensors.")

    if queries.shape[-1] != keys.shape[-1]:
        raise ValueError(
            "The hidden size of queries and keys should be the same.")
    if keys.shape[-2] != values.shape[-2]:
        raise ValueError(
            "The max sequence length in query batch and in key batch "
            "should be the same.")
    if keys.shape[-1] % num_heads != 0:
        raise ValueError("The hidden size of keys (%d) must be divisible "
                         "by the number of attention heads (%d)." %
                         (keys.shape[-1], num_heads))
    if values.shape[-1] % num_heads != 0:
        raise ValueError("The hidden size of values (%d) must be divisible "
                         "by the number of attention heads (%d)." %
                         (values.shape[-1], num_heads))

    def __compute_qkv(queries, keys, values, num_heads):
        """
        Add linear projection to queries, keys, and values.

        Args:
            queries(Tensor): a 3-D input Tensor.
            keys(Tensor): a 3-D input Tensor.
            values(Tensor): a 3-D input Tensor.
            num_heads(int): The number of heads. Linearly project the inputs
                            ONLY when num_heads > 1.

        Returns:
            Tensor: linearly projected output Tensors: queries', keys' and
                    values'. They have the same shapes with queries, keys and
                    values.
        """

        if num_heads == 1:
            return queries, keys, values

        q = layers.fc(input=queries,
                      size=queries.shape[-1],
                      num_flatten_dims=2)
        k = layers.fc(input=keys, size=keys.shape[-1], num_flatten_dims=2)
        v = layers.fc(input=values, size=values.shape[-1], num_flatten_dims=2)
        return q, k, v

    def __split_heads(x, num_heads):
        """
        Reshape the last dimension of inpunt tensor x so that it becomes two
        dimensions.

        Args:
            x(Tensor): a 3-D input Tensor.
            num_heads(int): The number of heads.

        Returns:
            Tensor: a Tensor with shape [..., n, m/num_heads], where m is size
                    of the last dimension of x.
        """
        if num_heads == 1:
            return x

        hidden_size = x.shape[-1]
        # reshape the 3-D input: [batch_size, max_sequence_length, hidden_dim]
        # into a 4-D output:
        # [batch_size, max_sequence_length, num_heads, hidden_size_per_head].
        reshaped = layers.reshape(x=x,
                                  shape=list(x.shape[:-1]) +
                                  [num_heads, hidden_size // num_heads])

        # permuate the dimensions into:
        # [batch_size, num_heads, max_sequence_len, hidden_size_per_head]
        return layers.transpose(x=reshaped, perm=[0, 2, 1, 3])

    def __combine_heads(x):
        """
        Reshape the last two dimensions of inpunt tensor x so that it becomes
        one dimension.

        Args:
            x(Tensor): a 4-D input Tensor with shape
                       [bs, num_heads, max_sequence_length, hidden_dim].

        Returns:
            Tensor: a Tensor with shape
                    [bs, max_sequence_length, num_heads * hidden_dim].
        """

        if len(x.shape) == 3: return x
        if len(x.shape) != 4:
            raise ValueError("Input(x) should be a 4-D Tensor.")

        trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
        return layers.reshape(x=trans_x,
                              shape=map(int, [
                                  trans_x.shape[0], trans_x.shape[1],
                                  trans_x.shape[2] * trans_x.shape[3]
                              ]))

    q, k, v = __compute_qkv(queries, keys, values, num_heads)

    q = __split_heads(q, num_heads)
    k = __split_heads(k, num_heads)
    v = __split_heads(v, num_heads)

    key_dim_per_head = keys.shape[-1] // num_heads
    scaled_q = layers.scale(x=q, scale=key_dim_per_head**-0.5)
    product = layers.matmul(x=k, y=scaled_q, transpose_y=True)

    weights = layers.reshape(x=layers.reshape(x=product,
                                              shape=[-1, product.shape[-1]],
                                              act="softmax"),
                             shape=product.shape)
    if dropout_rate:
        weights = layers.dropout(weights,
                                 dropout_prob=dropout_rate,
                                 is_test=False)
    ctx_multiheads = layers.matmul(weights, v)
    return __combine_heads(ctx_multiheads)
    def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000):
        ####################################### SETTINGS ###################################
        self.x_train = x_train
        self.x_test = x_test
        self.diff = diff
        self.batch_size = 100.0
        self.learning_rate = theano.shared(np.float32(0.0008))
        self.momentum = 0.3
        self.performance = {"train": [], "test": []}
        self.inpt = T.ftensor4(name="input")
        self.df = T.fmatrix(name="differential")
        self.dim_z = dim_z
        self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z])))
        self.activation = relu
        self.generative = False
        self.out_distribution = False
        # self.y = T.matrix(name="y")
        self.in_filters = [64, 64, 64]
        self.filter_lengths = [10.0, 10.0, 10.0]
        self.params = []
        # magic = 73888.
        self.magic = magic

        self.dropout_symbolic = T.fscalar()
        self.dropout_prob = theano.shared(np.float32(0.0))
        ####################################### LAYERS ######################################
        # LAYER 1 ##############################
        self.conv1 = one_d_conv_layer(
            self.inpt, self.in_filters[0], 1, self.filter_lengths[0], param_names=["W1", "b1"]
        )
        self.params += self.conv1.params
        self.bn1 = batchnorm(self.conv1.output)
        self.nl1 = self.activation(self.bn1.X)
        self.maxpool1 = pool_2d(self.nl1, [3, 1], stride=[2, 1], mode="average_exc_pad").astype(theano.config.floatX)
        self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic)
        # self.layer1_out = self.maxpool1
        # LAYER2 ################################
        self.flattened = T.flatten(self.layer1_out, outdim=2)
        # Variational Layer #####################
        self.latent_layer = variational_gauss_layer(self.flattened, self.magic, dim_z)
        self.params += self.latent_layer.params
        self.latent_out = self.latent_layer.output
        # Hidden Layer #########################
        self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic)
        self.params += self.hidden_layer.params
        self.hid_out = dropout(
            self.activation(self.hidden_layer.output).reshape(
                (self.inpt.shape[0], self.in_filters[-1], int(self.magic / self.in_filters[-1]), 1)
            ),
            self.dropout_symbolic,
        )
        # Devonvolutional 1 ######################
        self.deconv1 = one_d_deconv_layer(
            self.hid_out,
            1,
            self.in_filters[2],
            self.filter_lengths[2],
            pool=2.0,
            param_names=["W3", "b3"],
            distribution=False,
        )
        self.params += self.deconv1.params
        # self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic)
        self.tanh_out = self.deconv1.output
        self.last_layer = self.deconv1

        if self.out_distribution == True:
            self.trunk_sigma = self.last_layer.log_sigma[:, :, : self.inpt.shape[2], :]
        self.trunc_output = self.tanh_out[:, :, : self.inpt.shape[2], :]

        ################################### FUNCTIONS ######################################################
        self.get_latent_states = theano.function(
            [self.inpt], self.latent_out, givens=[[self.dropout_symbolic, self.dropout_prob]]
        )
        # self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior])
        # self.get_prior = theano.function([self.inpt],self.latent_layer.prior)
        # self.convolve1 = theano.function([self.inpt],self.layer1_out)
        # self.convolve2 = theano.function([self.inpt],self.layer2_out)
        self.output = theano.function(
            [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]]
        )
        self.get_flattened = theano.function(
            [self.inpt], self.flattened, givens=[[self.dropout_symbolic, self.dropout_prob]]
        )
        # self.deconvolve1 = theano.function([self.inpt],self.deconv1.output)
        # self.deconvolve2 = theano.function([self.inpt],self.deconv2.output)
        # self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2))
        self.output = theano.function(
            [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]]
        )
        # self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
        self.generate_from_z = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob], [self.latent_out, self.generative_z]],
        )

        self.cost = self.MSE()
        self.mse = self.MSE()
        # self.likelihood = self.log_px_z()
        # self.get_cost = theano.function([self.inpt],[self.cost,self.mse])

        # self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
        self.derivatives = T.grad(self.cost, self.params)
        # self.get_gradients = theano.function([self.inpt],self.derivatives)
        self.updates = adam(self.params, self.derivatives, self.learning_rate)
        # self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum)
        self.train_model = theano.function(
            inputs=[self.inpt, self.df],
            outputs=self.cost,
            updates=self.updates,
            givens=[[self.dropout_symbolic, self.dropout_prob]],
        )
Ejemplo n.º 8
0
    def forward(self):
        # in: c, q, c_mask, q_mask, ch, qh, y1, y2
        # out: yp1, yp2, loss
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope('emb'):
            with tf.variable_scope('char'):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])
            with tf.variable_scope('word'):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb_ori = tf.concat([c_emb, ch_emb], axis=2)
            q_emb_ori = tf.concat([q_emb, qh_emb], axis=2)

            # spatial dropout
            if config.use_spatial_dp:
                print("Using spatial dropout\n")
                if self.is_train:
                    q_emb_shape = tf.shape(q_emb_ori)
                    c_emb_shape = tf.shape(c_emb_ori)
                    q_emb = tf.nn.dropout(q_emb_ori,
                                          keep_prob=0.5 + config.keep_prob / 2,
                                          noise_shape=(q_emb_shape[0], 1,
                                                       q_emb_shape[2]))
                    c_emb = tf.nn.dropout(c_emb_ori,
                                          keep_prob=0.5 + config.keep_prob / 2,
                                          noise_shape=(c_emb_shape[0], 1,
                                                       c_emb_shape[2]))
                else:
                    q_emb = q_emb_ori
                    c_emb = c_emb_ori
            else:
                c_emb = c_emb_ori
                q_emb = q_emb_ori

        # context encoding: method1
        with tf.variable_scope('encoding'):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c = rnn(c_emb,
                    seq_len=self.c_len,
                    concat=True,
                    keep_origin_input=True)
            q = rnn(q_emb,
                    seq_len=self.q_len,
                    concat=True,
                    keep_origin_input=True)

        with tf.variable_scope('attention'):
            qc_att = dot_attention(inputs=c,
                                   memory=q,
                                   hidden_size=d,
                                   mask=self.q_mask,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train,
                                   scope='qc_dot_att')
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train,
                      scope='qc')

            cq_att = dot_attention(inputs=q,
                                   memory=c,
                                   hidden_size=d,
                                   mask=self.c_mask,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train,
                                   scope='cq_dot_att')
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=cq_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train,
                      scope='cq')
            c = rnn(qc_att, seq_len=self.c_len, keep_origin_input=False)
            q = rnn(cq_att, seq_len=self.q_len, keep_origin_input=False)

        # seq_length = self.q_len
        # idx = tf.concat(
        #         [tf.expand_dims(tf.range(tf.shape(q)[0]), axis=1),
        #          tf.expand_dims(seq_length - 1, axis=1)], axis=1)
        # # (B, 2h)
        # q_state = tf.gather_nd(q, idx)

        with tf.variable_scope('hybrid'):
            # B * N * Q
            doc_qry_mask = tf.keras.backend.batch_dot(
                tf.expand_dims(tf.cast(self.c_mask, tf.float32), 2),
                tf.expand_dims(tf.cast(self.q_mask, tf.float32), 1),
                axes=[2, 1])
            # (B, D, Q, 2h)
            doc_expand_embed = tf.tile(tf.expand_dims(c, 2),
                                       [1, 1, self.q_maxlen, 1])
            # (B, D, Q, 2h)
            qry_expand_embed = tf.tile(tf.expand_dims(q, 1),
                                       [1, self.c_maxlen, 1, 1])
            doc_qry_dot_embed = doc_expand_embed * qry_expand_embed
            # (B, D, Q, 6h)
            doc_qry_embed = tf.concat(
                [doc_expand_embed, qry_expand_embed, doc_qry_dot_embed],
                axis=3)
            # attention way
            num_units = doc_qry_embed.shape[-1]
            with tf.variable_scope('bi_attention'):
                w = tf.get_variable('W_att',
                                    shape=(num_units, 1),
                                    dtype=tf.float32,
                                    initializer=tf.random_uniform_initializer(
                                        -0.01, 0.01))
                # (B, D, Q)
                S = tf.matmul(
                    tf.reshape(doc_qry_embed, (-1, doc_qry_embed.shape[-1])),
                    w)
                S = tf.reshape(S, (N, self.c_maxlen, self.q_maxlen))
                # context2query, (B, D, 2h)
                c2q = tf.keras.backend.batch_dot(
                    tf.nn.softmax(softmax_mask(S, doc_qry_mask), dim=2), q)
                c2q_gated = c2q * c

            with tf.variable_scope('gated_attention'):
                # Gated Attention
                g_doc_qry_att = tf.keras.backend.batch_dot(
                    c, tf.transpose(q, (0, 2, 1)))
                # B * N * Q
                alphas = tf.nn.softmax(softmax_mask(g_doc_qry_att,
                                                    doc_qry_mask),
                                       dim=2)

                q_rep = tf.keras.backend.batch_dot(alphas, q)  # B x N x 2D
                d_gated = c * q_rep

                G = tf.concat([c, c2q, q_rep, c2q_gated, d_gated], axis=-1)
                # G = tf.nn.relu(dense(G, d * 2))

            with tf.variable_scope('match'):
                G = dot_attention(inputs=G,
                                  memory=G,
                                  hidden_size=d,
                                  mask=self.c_mask,
                                  keep_prob=config.keep_prob,
                                  is_train=self.is_train)

                rnn = gru(num_layers=1,
                          num_units=d,
                          batch_size=N,
                          input_size=G.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                doc_encoding = rnn(G, seq_len=self.c_len, concat=False)

        with tf.variable_scope('pointer'):
            # Use self-attention or bilinear attention
            init = summ(q,
                        d,
                        mask=self.q_mask,
                        keep_prob=config.keep_prob,
                        is_train=self.is_train)

            # init = self.bilinear_attention_layer(c, q_state, self.c_mask)

            pointer = ptr_layer(batch_size=N,
                                hidden_size=init.get_shape().as_list()[-1],
                                keep_prob=config.keep_prob,
                                is_train=self.is_train)
            logits1, logits2 = pointer(init, doc_encoding, d, self.c_mask)

        with tf.variable_scope('predict'):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)

            # loss1 = tf.nn.softmax_cross_entropy_with_logits_v2(
            #         logits=logits1, labels=tf.stop_gradient(self.y1))
            # loss2 = tf.nn.softmax_cross_entropy_with_logits_v2(
            #         logits=logits2, labels=tf.stop_gradient(self.y2))
            if config.use_ghmc_or_ghmr == 'ghmc':
                print('Using GHMC Loss\n')
                ghmc_loss_func = GHMC_loss(momentum=0.)
                loss1 = ghmc_loss_func(logits1, tf.stop_gradient(self.y1))
                loss2 = ghmc_loss_func(logits2, tf.stop_gradient(self.y2))
            elif config.use_ghmc_or_ghmr == 'ghmr':
                print('Using GHMR Loss\n')
                ghmr_loss_func = GHMR_loss()
                loss1 = ghmr_loss_func(logits1, tf.stop_gradient(self.y1))
                loss2 = ghmr_loss_func(logits2, tf.stop_gradient(self.y2))
            else:
                loss1 = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits1, labels=tf.stop_gradient(self.y1))
                loss2 = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits2, labels=tf.stop_gradient(self.y2))
            self.loss = tf.reduce_mean(loss1 + loss2)
Ejemplo n.º 9
0
def get_model(X, batch_size, image_dimension):

    input_shape = (batch_size, 3, image_dimension, image_dimension)
    all_parameters = []

    #############################################
    # a first convolution with 32 (3, 3) filters
    output, output_test, params, output_shape = convolutional(
        X, X, input_shape, 32, (3, 3))
    all_parameters += params

    # maxpool with size=(2, 2)
    output, output_test, params, output_shape = maxpool(
        output, output_test, output_shape, (2, 2))

    # relu activation
    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')

    # dropout
    output, output_test, params, output_shape = dropout(
        output, output_test, output_shape)

    #############################################
    # a second convolution with 32 (3, 3) filters
    output, output_test, params, output_shape = convolutional(
        output, output_test, output_shape, 32, (3, 3))
    all_parameters += params

    # maxpool with size=(2, 2)
    output, output_test, params, output_shape = maxpool(
        output, output_test, output_shape, (2, 2))

    # relu activation
    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')

    # dropout
    output, output_test, params, output_shape = dropout(
        output, output_test, output_shape)

    #############################################
    # a third convolution with 32 (3, 3) filters
    output, output_test, params, output_shape = convolutional(
        output, output_test, output_shape, 32, (3, 3))
    all_parameters += params

    # maxpool with size=(2, 2)
    output, output_test, params, output_shape = maxpool(
        output, output_test, output_shape, (2, 2))

    # relu activation
    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')

    # dropout
    output, output_test, params, output_shape = dropout(
        output, output_test, output_shape)

    #############################################
    # MLP first layer

    output = output.flatten(2)
    output_test = output_test.flatten(2)

    output, output_test, params, output_shape = linear(
        output, output_test,
        (output_shape[0], output_shape[1] * output_shape[2] * output_shape[3]),
        500)
    all_parameters += params

    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')

    #############################################
    # MLP second layer

    output, output_test, params, output_shape = linear(output, output_test,
                                                       output_shape, 1)
    all_parameters += params

    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'sigmoid')

    #
    return output, output_test, all_parameters
    def forward(self):
        # in: c, q, c_mask, q_mask, ch, qh, y1, y2
        # out: yp1, yp2, loss
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru
        gru = native_sru if config.use_sru else gru

        with tf.variable_scope('emb'):
            with tf.variable_scope('char'):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc])
                ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])
            with tf.variable_scope('word'):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope('encoding'):
            rnn = gru(num_layers=3, num_units=d, 
                batch_size=N, input_size=c_emb.get_shape().as_list()[-1], 
                keep_prob=config.keep_prob, is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            tf.get_variable_scope().reuse_variables()
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope('attention'):
            qc_att = dot_attention(inputs=c, memory=q, 
                                   hidden_size=d, mask=self.q_mask, 
                                   keep_prob=config.keep_prob, 
                                   is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, 
                batch_size=N, input_size=qc_att.get_shape().as_list()[-1], 
                keep_prob=config.keep_prob, is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope('match'):
            self_att = dot_attention(inputs=att, memory=att, 
                                   hidden_size=d, mask=self.c_mask, 
                                   keep_prob=config.keep_prob, 
                                   is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, 
                batch_size=N, input_size=self_att.get_shape().as_list()[-1], 
                keep_prob=config.keep_prob, is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope('pointer'):
            init = summ(q[:,:,-2 * d:], d, mask=self.q_mask, 
                        keep_prob=config.keep_prob, is_train=self.is_train)
            pointer = ptr_layer(batch_size=N, 
                                hidden_size=init.get_shape().as_list()[-1], 
                                keep_prob=config.keep_prob, 
                                is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope('predict'):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), 
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)

            # loss1 = tf.nn.softmax_cross_entropy_with_logits_v2(
            #         logits=logits1, labels=tf.stop_gradient(self.y1))
            loss1 = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits1, labels=tf.stop_gradient(self.y1))
            # loss2 = tf.nn.softmax_cross_entropy_with_logits_v2(
            #         logits=logits2, labels=tf.stop_gradient(self.y2))
            loss2 = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits2, labels=tf.stop_gradient(self.y2))
            self.loss = tf.reduce_mean(loss1 + loss2)
Ejemplo n.º 11
0
def get_model(X, batch_size, image_dimension):

	input_shape = (batch_size, 3, image_dimension, image_dimension)
	all_parameters = []

	#############################################
	# a first convolution with 32 (3, 3) filters
	output, output_test, params, output_shape = convolutional(X, X, input_shape, 32, (3, 3))
	all_parameters += params

	# maxpool with size=(2, 2)
	output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2))

	# relu activation
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')

	# dropout
	output, output_test, params, output_shape = dropout(output, output_test, output_shape)

	#############################################
	# a second convolution with 32 (3, 3) filters
	output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 32, (3, 3))
	all_parameters += params

	# maxpool with size=(2, 2)
	output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2))

	# relu activation
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')
	
	# dropout
	output, output_test, params, output_shape = dropout(output, output_test, output_shape)
	
	#############################################
	# a third convolution with 32 (3, 3) filters
	output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 32, (3, 3))
	all_parameters += params

	# maxpool with size=(2, 2)
	output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2))

	# relu activation
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')

	# dropout
	output, output_test, params, output_shape = dropout(output, output_test, output_shape)

	#############################################
	# MLP first layer

	output = output.flatten(2)
	output_test = output_test.flatten(2)
	
	output, output_test, params, output_shape = linear(output, output_test, (output_shape[0], output_shape[1]*output_shape[2]*output_shape[3]), 500)
	all_parameters += params

	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')

	#############################################
	# MLP second layer

	output, output_test, params, output_shape = linear(output, output_test, output_shape, 1)
	all_parameters += params

	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'sigmoid')

	#
	return output, output_test, all_parameters
Ejemplo n.º 12
0
def parameter_efficient(in_channels=1,
                        out_channels=2,
                        start_filters=64,
                        input_side_length=256,
                        depth=4,
                        res_blocks=2,
                        filter_size=3,
                        sparse_labels=True,
                        batch_size=1,
                        activation="cReLU",
                        batch_norm=True):
    """
    Creates the graph for the parameter efficient variant of the U-Net and sets up the appropriate input and output placeholder.

    Parameters
    ----------
    in_channels: int
        The depth of the input.
    out_channels: int
        The depth of number of classes of the output.
    start_filters : int
        The number of filters in the first convolution.
    input_side_length: int
        The side length of the square input.
    depth: int
        The depth of the U-part of the network. This is equal to the number of max-pooling layers.
    res_blocks: int
        The number of residual blocks in between max-pooling layers on the down-path and in between up-convolutions on the up-path.
    filter_size: int
        The width and height of the filter. The receptive field.
    sparse_labels: bool
        If true, the labels are integers, one integer per pixel, denoting the class that that pixel belongs to. If false, labels are one-hot encoded.
    batch_size: int
        The training batch size.
    activation: string
        Either "ReLU" for the standard ReLU activation or "cReLU" for the concatenated ReLU activation function.
    batch_norm: bool
        Whether to use batch normalization or not.

    Returns
    -------
    inputs : TF tensor
        The network input.
    logits: TF tensor
        The network output before SoftMax.
    ground_truth: TF tensor
        The desired output from the ground truth.
    keep_prob: TF float
        The TF variable holding the keep probability for drop out layers.
    training_bool: TF bool
        The TF variable holding the boolean value, which switches batch normalization to training or inference mode.    
    """

    activation = str.lower(activation)
    if activation not in ["relu", "crelu"]:
        raise ValueError("activation must be \"ReLU\" or \"cReLU\".")

    pool_size = 2

    # Define inputs and helper functions #

    with tf.variable_scope('inputs'):
        inputs = tf.placeholder(tf.float32,
                                shape=(batch_size, input_side_length,
                                       input_side_length, in_channels),
                                name='inputs')
        if sparse_labels:
            ground_truth = tf.placeholder(tf.int32,
                                          shape=(batch_size, input_side_length,
                                                 input_side_length),
                                          name='labels')
        else:
            ground_truth = tf.placeholder(tf.float32,
                                          shape=(batch_size, input_side_length,
                                                 input_side_length,
                                                 out_channels),
                                          name='labels')
        keep_prob = tf.placeholder(tf.float32, shape=[], name='keep_prob')
        training = tf.placeholder(tf.bool, shape=[], name="training")

        network_input = tf.transpose(inputs, perm=[0, 3, 1, 2])

    # [conv -> conv -> max pool -> drop out] + parameter updates
    def step_down(name,
                  input_,
                  filter_size=3,
                  res_blocks=2,
                  keep_prob=1.,
                  training=False):

        with tf.variable_scope(name):

            with tf.variable_scope("res_block_0"):
                conv_out, tiled_input = layers.res_block(
                    input_,
                    filter_size,
                    channel_multiplier=2,
                    depthwise_multiplier=2,
                    convolutions=2,
                    training=training,
                    activation=activation,
                    batch_norm=batch_norm,
                    data_format="NCHW")

            for i in xrange(1, res_blocks):
                with tf.variable_scope("res_block_" + str(i)):
                    conv_out = layers.res_block(conv_out,
                                                filter_size,
                                                channel_multiplier=1,
                                                depthwise_multiplier=2,
                                                convolutions=2,
                                                training=training,
                                                activation=activation,
                                                batch_norm=batch_norm,
                                                data_format="NCHW")

            conv_out = conv_out + tiled_input

            pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW")

            bottom_out = layers.dropout(pool_out, keep_prob)
            side_out = layers.dropout(conv_out, keep_prob)

        return bottom_out, side_out

    # parameter updates + [upconv and concat -> drop out -> conv -> conv]
    def step_up(name,
                bottom_input,
                side_input,
                filter_size=3,
                res_blocks=2,
                keep_prob=1.,
                training=False):

        with tf.variable_scope(name):
            added_input = layers.upconv_add_block(bottom_input,
                                                  side_input,
                                                  data_format="NCHW")

            conv_out = added_input
            for i in xrange(res_blocks):
                with tf.variable_scope("res_block_" + str(i)):
                    conv_out = layers.res_block(conv_out,
                                                filter_size,
                                                channel_multiplier=1,
                                                depthwise_multiplier=2,
                                                convolutions=2,
                                                training=training,
                                                activation=activation,
                                                batch_norm=batch_norm,
                                                data_format="NCHW")

            result = layers.dropout(conv_out, keep_prob)

        return result

    # Build the network #

    with tf.variable_scope('contracting'):

        outputs = []

        with tf.variable_scope("step_0"):

            # Conv 1
            in_filters = in_channels
            out_filters = start_filters

            stddev = np.sqrt(2. / (filter_size**2 * in_filters))
            w = layers.weight_variable(
                [filter_size, filter_size, in_filters, out_filters],
                stddev=stddev,
                name="weights")

            out_ = tf.nn.conv2d(network_input,
                                w, [1, 1, 1, 1],
                                padding="SAME",
                                data_format="NCHW")
            out_ = out_ + layers.bias_variable([out_filters, 1, 1],
                                               name='biases')

            # Batch Norm 1
            if batch_norm:
                out_ = tf.layers.batch_normalization(out_,
                                                     axis=1,
                                                     momentum=0.999,
                                                     center=True,
                                                     scale=True,
                                                     training=training,
                                                     trainable=True,
                                                     name="batch_norm",
                                                     fused=True)

            in_filters = out_filters

            # concatenated ReLU
            if activation == "crelu":
                out_ = tf.concat([out_, -out_], axis=1)
                in_filters = 2 * in_filters
            out_ = tf.nn.relu(out_)

            # Conv 2
            stddev = np.sqrt(2. / (filter_size**2 * in_filters))
            w = layers.weight_variable(
                [filter_size, filter_size, in_filters, out_filters],
                stddev=stddev,
                name="weights")

            out_ = tf.nn.conv2d(out_,
                                w, [1, 1, 1, 1],
                                padding="SAME",
                                data_format="NCHW")
            out_ = out_ + layers.bias_variable([out_filters, 1, 1],
                                               name='biases')

            # Res Block 1
            conv_out = layers.res_block(out_,
                                        filter_size,
                                        channel_multiplier=1,
                                        depthwise_multiplier=2,
                                        convolutions=2,
                                        training=training,
                                        activation=activation,
                                        batch_norm=batch_norm,
                                        data_format="NCHW")

            pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW")

            bottom_out = layers.dropout(pool_out, keep_prob)
            side_out = layers.dropout(conv_out, keep_prob)

            outputs.append(side_out)

        # Build contracting path
        for i in xrange(1, depth):
            bottom_out, side_out = step_down('step_' + str(i),
                                             bottom_out,
                                             filter_size=filter_size,
                                             res_blocks=res_blocks,
                                             keep_prob=keep_prob,
                                             training=training)
            outputs.append(side_out)

    # Bottom [conv -> conv]
    with tf.variable_scope('step_' + str(depth)):

        with tf.variable_scope("res_block_0"):
            conv_out, tiled_input = layers.res_block(bottom_out,
                                                     filter_size,
                                                     channel_multiplier=2,
                                                     depthwise_multiplier=2,
                                                     convolutions=2,
                                                     training=training,
                                                     activation=activation,
                                                     batch_norm=batch_norm,
                                                     data_format="NCHW")
        for i in xrange(1, res_blocks):
            with tf.variable_scope("res_block_" + str(i)):
                conv_out = layers.res_block(conv_out,
                                            filter_size,
                                            channel_multiplier=1,
                                            depthwise_multiplier=2,
                                            convolutions=2,
                                            training=training,
                                            activation=activation,
                                            batch_norm=batch_norm,
                                            data_format="NCHW")

        conv_out = conv_out + tiled_input
        current_tensor = layers.dropout(conv_out, keep_prob)

    with tf.variable_scope('expanding'):

        # Set initial parameter
        outputs.reverse()

        # Build expanding path
        for i in xrange(depth):
            current_tensor = step_up('step_' + str(depth + i + 1),
                                     current_tensor,
                                     outputs[i],
                                     filter_size=filter_size,
                                     res_blocks=res_blocks,
                                     keep_prob=keep_prob,
                                     training=training)

    # Last layer is a 1x1 convolution to get the predictions
    # We don't want an activation function for this one (softmax will be applied later), so we're doing it manually
    in_filters = current_tensor.shape.as_list()[1]
    stddev = np.sqrt(2. / in_filters)

    with tf.variable_scope('classification'):

        w = layers.weight_variable([1, 1, in_filters, out_channels],
                                   stddev,
                                   name='weights')
        b = layers.bias_variable([out_channels, 1, 1], name='biases')

        conv = tf.nn.conv2d(current_tensor,
                            w,
                            strides=[1, 1, 1, 1],
                            padding="SAME",
                            data_format="NCHW",
                            name='conv')
        logits = conv + b

        logits = tf.transpose(logits, perm=[0, 2, 3, 1])

    return inputs, logits, ground_truth, keep_prob, training
Ejemplo n.º 13
0
def unet(in_channels=1,
         out_channels=2,
         start_filters=64,
         side_length=572,
         depth=4,
         convolutions=2,
         filter_size=3,
         sparse_labels=True,
         batch_size=1):
    """
    Creates the graph for the standard U-Net and sets up the appropriate input and output placeholder.

    Parameters
    ----------
    in_channels: int
        The depth of the input.
    out_channels: int
        The depth of number of classes of the output.
    start_filters : int
        The number of filters in the first convolution.
    side_length: int
        The side length of the square input.
    depth: int
        The depth of the U-part of the network. This is equal to the number of max-pooling layers.
    convolutions: int
        The number of convolutions in between max-pooling layers on the down-path and in between up-convolutions on the up-path.
    filter_size: int
        The width and height of the filter. The receptive field.
    sparse_labels: bool
        If true, the labels are integers, one integer per pixel, denoting the class that that pixel belongs to. If false, labels are one-hot encoded.
    batch_size: int
        The training batch size.

    Returns
    -------
    inputs : TF tensor
        The network input.
    logits: TF tensor
        The network output before SoftMax.
    ground_truth: TF tensor
        The desired output from the ground truth.
    keep_prob: TF float
        The TF variable holding the keep probability for drop out layers.  
    """

    pool_size = 2
    padding = "SAME"

    # Define inputs and helper functions #
    with tf.variable_scope('inputs'):
        inputs = tf.placeholder(tf.float32,
                                shape=(batch_size, side_length, side_length,
                                       in_channels),
                                name='inputs')
        if sparse_labels:
            ground_truth = tf.placeholder(tf.int32,
                                          shape=(batch_size, side_length,
                                                 side_length),
                                          name='labels')
        else:
            ground_truth = tf.placeholder(tf.float32,
                                          shape=(batch_size, side_length,
                                                 side_length, out_channels),
                                          name='labels')
        keep_prob = tf.placeholder(tf.float32, shape=[], name='keep_prob')

        network_input = tf.transpose(inputs, perm=[0, 3, 1, 2])

    # [conv -> conv -> max pool -> drop out] + parameter updates
    def step_down(name, _input):

        with tf.variable_scope(name):
            conv_out = layers.conv_block(_input,
                                         filter_size,
                                         channel_multiplier=2,
                                         convolutions=convolutions,
                                         padding=padding,
                                         data_format="NCHW")
            pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW")
            result = layers.dropout(pool_out, keep_prob)

        return result, conv_out

    # parameter updates + [upconv and concat -> drop out -> conv -> conv]
    def step_up(name, bottom_input, side_input):

        with tf.variable_scope(name):
            concat_out = layers.upconv_concat_block(bottom_input,
                                                    side_input,
                                                    data_format="NCHW")
            drop_out = layers.dropout(concat_out, keep_prob)
            result = layers.conv_block(drop_out,
                                       filter_size,
                                       channel_multiplier=0.5,
                                       convolutions=convolutions,
                                       padding=padding,
                                       data_format="NCHW")

        return result

    # Build the network #

    with tf.variable_scope('contracting'):

        # Set initial parameters
        outputs = []

        # Build contracting path
        with tf.variable_scope("step_0"):
            conv_out = layers.conv_block(network_input,
                                         filter_size,
                                         out_filters=start_filters,
                                         convolutions=convolutions,
                                         padding=padding,
                                         data_format="NCHW")
            pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW")
            current_tensor = layers.dropout(pool_out, keep_prob)
            outputs.append(conv_out)

        for i in xrange(1, depth):
            current_tensor, conv_out = step_down("step_" + str(i),
                                                 current_tensor)
            outputs.append(conv_out)

    # Bottom [conv -> conv]
    with tf.variable_scope("step_" + str(depth)):
        current_tensor = layers.conv_block(current_tensor,
                                           filter_size,
                                           channel_multiplier=2,
                                           convolutions=convolutions,
                                           padding=padding,
                                           data_format="NCHW")

    with tf.variable_scope("expanding"):

        # Set initial parameter
        outputs.reverse()

        # Build expanding path
        for i in xrange(depth):
            current_tensor = step_up("step_" + str(depth + i + 1),
                                     current_tensor, outputs[i])

    # Last layer is a 1x1 convolution to get the predictions
    # We don't want an activation function for this one (softmax will be applied later), so we're doing it manually
    in_filters = current_tensor.shape.as_list()[1]
    stddev = np.sqrt(2. / in_filters)

    with tf.variable_scope("classification"):

        weight = layers.weight_variable([1, 1, in_filters, out_channels],
                                        stddev,
                                        name="weights")
        bias = layers.bias_variable([out_channels, 1, 1], name="biases")

        conv = tf.nn.conv2d(current_tensor,
                            weight,
                            strides=[1, 1, 1, 1],
                            padding="VALID",
                            name="conv",
                            data_format="NCHW")
        logits = conv + bias

        logits = tf.transpose(logits, perm=[0, 2, 3, 1])

    return inputs, logits, ground_truth, keep_prob
Ejemplo n.º 14
0
def alexnet(inputs, num_classes, keep_prob):
    """Create alexnet model
    """
    x = tf.reshape(inputs, shape=[-1, 28, 28, 1])

    # first conv layer, downsampling layer, and normalization layer
    conv1 = conv2d(x, shape=(11, 11, 1, 96), padding='SAME', name='conv1')
    pool1 = max_pooling(conv1,
                        ksize=(2, 2),
                        stride=(2, 2),
                        padding='SAME',
                        name='pool1')
    norm1 = norm(pool1, radius=4, name='norm1')

    # second conv layer
    conv2 = conv2d(norm1, shape=(5, 5, 96, 256), padding='SAME', name='conv2')
    pool2 = max_pooling(conv2,
                        ksize=(2, 2),
                        stride=(2, 2),
                        padding='SAME',
                        name='pool2')
    norm2 = norm(pool2, radius=4, name='norm2')

    # 3rd conv layer
    conv3 = conv2d(norm2, shape=(3, 3, 256, 384), padding='SAME', name='conv3')
    # pool3 = max_pooling(conv3, ksize=(2, 2), stride=(2, 2), padding='SAME', name='pool3')
    norm3 = norm(conv3, radius=4, name='norm3')

    # 4th conv layer
    conv4 = conv2d(norm3, shape=(3, 3, 384, 384), padding='SAME', name='conv4')

    # 5th conv layer
    conv5 = conv2d(conv4, shape=(3, 3, 384, 256), padding='SAME', name='conv5')
    pool5 = max_pooling(conv5,
                        ksize=(2, 2),
                        stride=(2, 2),
                        padding='SAME',
                        name='pool5')
    norm5 = norm(pool5, radius=4, name='norm5')

    # first fully connected layer
    fc1 = tf.reshape(norm5, shape=(-1, 4 * 4 * 256))
    fc1 = fc(fc1, shape=(4 * 4 * 256, 4096), name='fc1')
    fc1 = dropout(fc1, keep_prob=keep_prob, name='dropout1')

    fc2 = fc(fc1, shape=(4096, 4096), name='fc2')
    fc2 = dropout(fc2, keep_prob=keep_prob, name='dropout2')

    # output logits value
    with tf.variable_scope('classifier') as scope:
        weights = tf.get_variable('weights',
                                  shape=[4096, num_classes],
                                  initializer=tf.initializers.he_normal())
        biases = tf.get_variable('biases',
                                 shape=[num_classes],
                                 initializer=tf.initializers.random_normal())
        # define output logits value
        logits = tf.add(tf.matmul(fc2, weights),
                        biases,
                        name=scope.name + '_logits')

    return logits
Ejemplo n.º 15
0
    def __init_network(self):
        with tf.variable_scope('mobilenet_encoder'):
            # Preprocessing as done in the paper
            with tf.name_scope('pre_processing'):
                preprocessed_input = (self.X - self.mean_img) / 255.0

            # Model is here!
            conv1_1 = conv2d('conv_1', preprocessed_input, num_filters=int(round(32 * self.args.width_multiplier)),
                             kernel_size=(3, 3),
                             padding='SAME', stride=(2, 2), activation=tf.nn.relu6,
                             batchnorm_enabled=self.args.batchnorm_enabled,
                             is_training=self.is_training, l2_strength=self.args.l2_strength, bias=self.args.bias)
            self.__add_to_nodes([conv1_1])
            ############################################################################################
            conv2_1_dw, conv2_1_pw = depthwise_separable_conv2d('conv_ds_2', conv1_1,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=64, kernel_size=(3, 3), padding='SAME',
                                                                stride=(1, 1),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv2_1_dw, conv2_1_pw])

            conv2_2_dw, conv2_2_pw = depthwise_separable_conv2d('conv_ds_3', conv2_1_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=128, kernel_size=(3, 3), padding='SAME',
                                                                stride=(2, 2),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv2_2_dw, conv2_2_pw])
            ############################################################################################
            conv3_1_dw, conv3_1_pw = depthwise_separable_conv2d('conv_ds_4', conv2_2_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=128, kernel_size=(3, 3), padding='SAME',
                                                                stride=(1, 1),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv3_1_dw, conv3_1_pw])

            conv3_2_dw, conv3_2_pw = depthwise_separable_conv2d('conv_ds_5', conv3_1_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=256, kernel_size=(3, 3), padding='SAME',
                                                                stride=(2, 2),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv3_2_dw, conv3_2_pw])
            ############################################################################################
            conv4_1_dw, conv4_1_pw = depthwise_separable_conv2d('conv_ds_6', conv3_2_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=256, kernel_size=(3, 3), padding='SAME',
                                                                stride=(1, 1),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv4_1_dw, conv4_1_pw])

            conv4_2_dw, conv4_2_pw = depthwise_separable_conv2d('conv_ds_7', conv4_1_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=512, kernel_size=(3, 3), padding='SAME',
                                                                stride=(2, 2),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv4_2_dw, conv4_2_pw])
            ############################################################################################
            conv5_1_dw, conv5_1_pw = depthwise_separable_conv2d('conv_ds_8', conv4_2_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=512, kernel_size=(3, 3), padding='SAME',
                                                                stride=(1, 1),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv5_1_dw, conv5_1_pw])

            conv5_2_dw, conv5_2_pw = depthwise_separable_conv2d('conv_ds_9', conv5_1_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=512, kernel_size=(3, 3), padding='SAME',
                                                                stride=(1, 1),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv5_2_dw, conv5_2_pw])

            conv5_3_dw, conv5_3_pw = depthwise_separable_conv2d('conv_ds_10', conv5_2_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=512, kernel_size=(3, 3), padding='SAME',
                                                                stride=(1, 1),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv5_3_dw, conv5_3_pw])

            conv5_4_dw, conv5_4_pw = depthwise_separable_conv2d('conv_ds_11', conv5_3_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=512, kernel_size=(3, 3), padding='SAME',
                                                                stride=(1, 1),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv5_4_dw, conv5_4_pw])

            conv5_5_dw, conv5_5_pw = depthwise_separable_conv2d('conv_ds_12', conv5_4_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=512, kernel_size=(3, 3), padding='SAME',
                                                                stride=(1, 1),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv5_5_dw, conv5_5_pw])

            conv5_6_dw, conv5_6_pw = depthwise_separable_conv2d('conv_ds_13', conv5_5_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=1024, kernel_size=(3, 3), padding='SAME',
                                                                stride=(2, 2),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv5_6_dw, conv5_6_pw])
            ############################################################################################
            conv6_1_dw, conv6_1_pw = depthwise_separable_conv2d('conv_ds_14', conv5_6_pw,
                                                                width_multiplier=self.args.width_multiplier,
                                                                num_filters=1024, kernel_size=(3, 3), padding='SAME',
                                                                stride=(1, 1),
                                                                batchnorm_enabled=self.args.batchnorm_enabled,
                                                                activation=tf.nn.relu6,
                                                                is_training=self.is_training,
                                                                l2_strength=self.args.l2_strength,
                                                                biases=(self.args.bias, self.args.bias))
            self.__add_to_nodes([conv6_1_dw, conv6_1_pw])
            ############################################################################################
            avg_pool = avg_pool_2d(conv6_1_pw, size=(7, 7), stride=(1, 1))
            dropped = dropout(avg_pool, self.args.dropout_keep_prob, self.is_training)
            self.logits = flatten(conv2d('fc', dropped, kernel_size=(1, 1), num_filters=self.args.num_classes,
                                         l2_strength=self.args.l2_strength,
                                         bias=self.args.bias))
            self.__add_to_nodes([avg_pool, dropped, self.logits])
Ejemplo n.º 16
0
    def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000):
        ####################################### SETTINGS ###################################
        self.x_train = x_train
        self.x_test = x_test
        self.diff = diff
        self.batch_size = 100.
        self.learning_rate = theano.shared(np.float32(0.0008))
        self.momentum = 0.3
        self.performance = {"train": [], "test": []}
        self.inpt = T.ftensor4(name='input')
        self.df = T.fmatrix(name='differential')
        self.dim_z = dim_z
        self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z])))
        self.activation = T.abs_
        self.generative = False
        self.out_distribution = False
        #self.y = T.matrix(name="y")
        self.in_filters = [32, 32, 32, 32]
        self.filter_lengths = [11., 11., 11., 11.]
        self.params = []
        #magic = 73888.
        self.magic = magic

        self.dropout_symbolic = T.fscalar()
        self.dropout_prob = theano.shared(np.float32(0.0))
        ####################################### LAYERS ######################################
        # LAYER 1 ##############################
        self.conv1 = one_d_conv_layer(self.inpt,
                                      self.in_filters[0],
                                      1,
                                      self.filter_lengths[0],
                                      param_names=["W1", 'b1'])
        self.params += self.conv1.params
        self.bn1 = batchnorm(self.conv1.output)
        self.nl1 = self.activation(self.bn1.X)
        self.maxpool1 = ds.max_pool_2d(self.nl1, [3, 1],
                                       st=[2, 1],
                                       ignore_border=False).astype(
                                           theano.config.floatX)
        self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic)
        # LAYER 2 ##############################
        self.conv2 = one_d_conv_layer(self.layer1_out,
                                      self.in_filters[1],
                                      self.in_filters[0],
                                      self.filter_lengths[1],
                                      param_names=["W2", 'b2'])
        self.params += self.conv2.params
        self.bn2 = batchnorm(self.conv2.output)
        self.nl2 = self.activation(self.bn2.X)
        self.maxpool2 = ds.max_pool_2d(self.nl2, [3, 1],
                                       st=[2, 1],
                                       ignore_border=False).astype(
                                           theano.config.floatX)
        self.layer2_out = dropout(self.maxpool2, self.dropout_symbolic)
        #self.layer1_out = self.maxpool1
        # LAYER 3 ################################
        self.flattened = T.flatten(self.layer2_out, outdim=2)
        # Variational Layer #####################
        self.latent_layer = variational_gauss_layer(self.flattened, self.magic,
                                                    dim_z)
        self.params += self.latent_layer.params
        self.latent_out = self.latent_layer.output
        # Hidden Layer #########################
        self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic)
        self.params += self.hidden_layer.params
        self.hid_out = dropout(
            self.activation(self.hidden_layer.output).reshape(
                (self.inpt.shape[0], self.in_filters[-1],
                 int(self.magic / self.in_filters[-1]), 1)),
            self.dropout_symbolic)
        # Deconv 1 ######################
        self.deconv1 = one_d_deconv_layer(self.hid_out,
                                          self.in_filters[2],
                                          self.in_filters[2],
                                          self.filter_lengths[2],
                                          pool=2.,
                                          param_names=["W3", 'b3'],
                                          distribution=False)
        self.params += self.deconv1.params
        # Deconv 2 ######################
        self.deconv2 = one_d_deconv_layer(self.deconv1.output,
                                          1,
                                          self.in_filters[3],
                                          self.filter_lengths[3],
                                          pool=2.,
                                          param_names=["W4", 'b4'],
                                          distribution=False)
        self.params += self.deconv2.params
        #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic)
        self.tanh_out = self.deconv2.output
        self.last_layer = self.deconv2

        if self.out_distribution == True:
            self.trunk_sigma = self.last_layer.log_sigma[:, :, :self.inpt.
                                                         shape[2], :]
        self.trunc_output = self.tanh_out[:, :, :self.inpt.shape[2], :]

        ################################### FUNCTIONS ######################################################
        self.get_latent_states = theano.function(
            [self.inpt],
            self.latent_out,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior])
        #self.get_prior = theano.function([self.inpt],self.latent_layer.prior)
        #self.convolve1 = theano.function([self.inpt],self.layer1_out)
        #self.convolve2 = theano.function([self.inpt],self.layer2_out)
        self.output = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        self.get_flattened = theano.function(
            [self.inpt],
            self.flattened,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output)
        #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output)
        #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2))
        self.output = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        self.generate_from_z = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob],
                    [self.latent_out, self.generative_z]])

        self.cost = self.MSE()
        self.mse = self.MSE()
        #self.likelihood = self.log_px_z()
        #self.get_cost = theano.function([self.inpt],[self.cost,self.mse])

        #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
        self.derivatives = T.grad(self.cost, self.params)
        #self.get_gradients = theano.function([self.inpt],self.derivatives)
        self.updates = adam(self.params, self.derivatives, self.learning_rate)
        #self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum)
        self.train_model = theano.function(
            inputs=[self.inpt, self.df],
            outputs=self.cost,
            updates=self.updates,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
Ejemplo n.º 17
0
def unet(in_channels=1, out_channels=2, start_filters=64, input_side_length=572, depth=4, convolutions=2, filter_size=3, sparse_labels=True, batch_size=1, padded_convolutions=False):

    if not padded_convolutions:
        raise NotImplementedError("padded_convolutions=False has not yet been implemented!")

    pool_size = 2

    padding = "SAME" if padded_convolutions else "VALID"

    # Test whether input_side_length fits the depth, number of convolutions per step and filter_size
    output_side_length = input_side_length if padded_convolutions else get_output_side_length(input_side_length, depth, convolutions, filter_size, pool_size)

    # Define inputs and helper functions #
    with tf.variable_scope('inputs'):
        inputs = tf.placeholder(tf.float32, shape=(batch_size, input_side_length, input_side_length, in_channels), name='inputs')
        if sparse_labels:
            ground_truth = tf.placeholder(tf.int32, shape=(batch_size, output_side_length, output_side_length), name='labels')
        else:
            ground_truth = tf.placeholder(tf.float32, shape=(batch_size, output_side_length, output_side_length, out_channels), name='labels')
        keep_prob = tf.placeholder(tf.float32, shape=[], name='keep_prob')

        network_input = tf.transpose(inputs, perm=[0, 3, 1, 2])

    # [conv -> conv -> max pool -> drop out] + parameter updates
    def step_down(name, _input):

        with tf.variable_scope(name):
            conv_out = layers.conv_block(_input, filter_size, channel_multiplier=2, convolutions=convolutions, padding=padding, data_format="NCHW")
            pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW")
            result = layers.dropout(pool_out, keep_prob)

        return result, conv_out

    # parameter updates + [upconv and concat -> drop out -> conv -> conv]
    def step_up(name, bottom_input, side_input):

        with tf.variable_scope(name):
            concat_out = layers.upconv_concat_block(bottom_input, side_input, data_format="NCHW")
            drop_out = layers.dropout(concat_out, keep_prob)
            result = layers.conv_block(drop_out, filter_size, channel_multiplier=0.5, convolutions=convolutions, padding=padding, data_format="NCHW")

        return result

    # Build the network #

    with tf.variable_scope('contracting'):

        # Set initial parameters
        outputs = []

        # Build contracting path
        with tf.variable_scope("step_0"):
            conv_out = layers.conv_block(network_input, filter_size, out_filters=start_filters, convolutions=convolutions, padding=padding, data_format="NCHW")
            pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW")
            current_tensor = layers.dropout(pool_out, keep_prob)
            outputs.append(conv_out)

        for i in xrange(1, depth):
            current_tensor, conv_out = step_down("step_" + str(i), current_tensor)
            outputs.append(conv_out)

    # Bottom [conv -> conv]
    with tf.variable_scope("step_" + str(depth)):
        current_tensor = layers.conv_block(current_tensor, filter_size, channel_multiplier=2, convolutions=convolutions, padding=padding, data_format="NCHW")

    with tf.variable_scope("expanding"):

        # Set initial parameter
        outputs.reverse()

        # Build expanding path
        for i in xrange(depth):
            current_tensor = step_up("step_" + str(depth + i + 1), current_tensor, outputs[i])

    # Last layer is a 1x1 convolution to get the predictions
    # We don't want an activation function for this one (softmax will be applied later), so we're doing it manually
    in_filters = current_tensor.shape.as_list()[1]
    stddev = np.sqrt(2. / in_filters)

    with tf.variable_scope("classification"):

        weight = layers.weight_variable([1, 1, in_filters, out_channels], stddev, name="weights")
        bias = layers.bias_variable([out_channels, 1, 1], name="biases")

        conv = tf.nn.conv2d(current_tensor, weight, strides=[1, 1, 1, 1], padding="VALID", name="conv", data_format="NCHW")
        logits = conv + bias

        logits = tf.transpose(logits, perm=[0, 2, 3, 1])

    return inputs, logits, ground_truth, keep_prob
Ejemplo n.º 18
0
	def __init__(self,x_train,dim_z=10,batch_size = 10,filter_no = [5.,5.,5.],filter_l = [10.,10.,10.],
		pooling_d=3,pooling_s=2,learning_rate = 0.0008,dim_y=None,y_train=None,diff=None,magic=5000):
		####################################### SETTINGS ###################################
		self.x_train = x_train
		self.y_train = y_train
		if y_train !=None:
			self.dim_y = dim_y
		self.diff=diff
		self.batch_size = batch_size
		self.learning_rate = theano.shared(np.float32(learning_rate))
		self.performance = {"train":[]}
		self.inpt = T.ftensor4(name='input')
		self.Y = T.fcol(name= 'label')
		self.df = T.fmatrix(name='differential')
		self.dim_z = dim_z
		self.magic =magic
		self.pooling_d = pooling_d
		self.pooling_s = pooling_s
		self.generative_z = theano.shared(np.float32(np.zeros([1,dim_z])))
		self.generative_hid = theano.shared(np.float32(np.zeros([1,magic])))
		self.activation =relu
		self.out_distribution=False
		self.in_filters = filter_l
		self.filter_lengths = filter_no
		self.params = []


		self.d_o_prob = theano.shared(np.float32(0.0))
		####################################### LAYERS ######################################
		# LAYER 1 ##############################
		self.conv1 = one_d_conv_layer(self.inpt,self.in_filters[0],1,self.filter_lengths[0],param_names = ["W1",'b1']) 
		self.params+=self.conv1.params
		self.bn1 = batchnorm(self.conv1.output)
		self.nl1 = self.activation(self.bn1.X)
		self.maxpool1 = ds.max_pool_2d(self.nl1,[self.pooling_d,1],st=[self.pooling_s,1],ignore_border = False).astype(theano.config.floatX)
		self.layer1_out = dropout(self.maxpool1,self.d_o_prob)
		self.flattened = T.flatten(self.layer1_out,outdim = 2)
		# Conditional +variational layer layer #####################
		if y_train != None:
			self.c_enc =hidden_layer(self.Y,1,self.dim_y)
			self.c_dec = hidden_layer(self.Y,1,self.dim_y,param_names = ["W10",'b10'])
			self.params+=self.c_enc.params
			self.params+=self.c_dec.params
			self.c_nl = self.activation(self.c_enc.output)
			self.c_nl_dec = self.activation(self.c_dec.output)
			self.concatenated = T.concatenate((self.flattened,self.c_nl),axis = 1)
			self.latent_layer = variational_gauss_layer(self.concatenated,self.magic+self.dim_y,dim_z)
		else:
			self.latent_layer = variational_gauss_layer(self.flattened,self.magic,dim_z)
		self.params+=self.latent_layer.params
		self.latent_out = self.latent_layer.output
		# Hidden Layer #########################
		if y_train!= None:
			self.dec_concat = T.concatenate((self.latent_out,self.c_nl_dec),axis = 1)
			self.hidden_layer = hidden_layer(self.dec_concat,self.dim_z+self.dim_y,self.magic)
		else:
			self.hidden_layer = hidden_layer(self.latent_out,dim_z,self.magic)
		self.params+=self.hidden_layer.params
		self.hid_out = dropout(self.activation(self.hidden_layer.output).reshape((self.inpt.shape[0],self.in_filters[-1],int(self.magic/self.in_filters[-1]),1)),self.d_o_prob)
		# Devonvolutional 1 ######################
		self.deconv1 = one_d_deconv_layer(self.hid_out,1,self.in_filters[2],self.filter_lengths[2],pool=self.pooling_d,param_names = ["W3",'b3'],distribution=False)
		self.params+=self.deconv1.params
		#self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic)
		self.tanh_out = self.deconv1.output
		self.last_layer = self.deconv1

		if self.out_distribution==True:
			self.trunk_sigma =  self.last_layer.log_sigma[:,:,:self.inpt.shape[2],:]
		self.trunc_output = self.tanh_out[:,:,:self.inpt.shape[2],:]
		self.cost = self.MSE()
		self.mse = self.MSE()
		#self.likelihood = self.log_px_z()
		#self.get_cost = theano.function([self.inpt],[self.cost,self.mse])

		#self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
		self.derivatives = T.grad(self.cost,self.params)
		#self.get_gradients = theano.function([self.inpt],self.derivatives)
		self.updates =adam(self.params,self.derivatives,self.learning_rate)
		
		################################### FUNCTIONS ######################################################
		#self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior])
		#self.get_prior = theano.function([self.inpt],self.latent_layer.prior)
		#self.convolve1 = theano.function([self.inpt],self.layer1_out)
		#self.convolve2 = theano.function([self.inpt],self.layer2_out)
		#self.deconvolve1 = theano.function([self.inpt],self.deconv1.output)
		#self.deconvolve2 = theano.function([self.inpt],self.deconv2.output)
		#self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2))
		#self.output = theano.function([self.inpt],self.trunc_output,givens=[[self.dropout_symbolic,self.dropout_prob]])
		#self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
		#self.get_cost = theano.function([self.inpt],[self.cost,self.mse])
		#self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
		#self.get_gradients = theano.function([self.inpt],self.derivatives)

		self.generate_from_hid = theano.function([self.inpt],self.trunc_output,givens = [[self.hidden_layer.output,self.generative_hid]])
		self.get_flattened = theano.function([self.inpt],self.flattened)
		if self.y_train!=None:
			self.generate_from_z = theano.function([self.inpt,self.Y],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
			self.train_model = theano.function(inputs = [self.inpt,self.df,self.Y],outputs = self.cost,updates = self.updates)
			self.get_latent_states = theano.function([self.inpt,self.Y],self.latent_out)
			self.get_c_enc = theano.function([self.Y],self.c_enc.output)
			self.output = theano.function([self.inpt,self.Y],self.trunc_output)
			self.get_concat = theano.function([self.inpt,self.Y],self.concatenated)
		else:
			self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
			self.train_model = theano.function(inputs = [self.inpt,self.df],outputs = self.cost,updates = self.updates)
			self.output = theano.function([self.inpt],self.trunc_output)
			self.get_latent_states = theano.function([self.inpt],self.latent_out)
Ejemplo n.º 19
0
def parameter_efficient(in_channels=1, out_channels=2, start_filters=64, input_side_length=256, depth=4, res_blocks=2, filter_size=3, sparse_labels=True, batch_size=1, activation="cReLU", batch_norm=True):

    activation = str.lower(activation)
    if activation not in ["relu", "crelu"]:
        raise ValueError("activation must be \"ReLU\" or \"cReLU\".")

    pool_size = 2

    # Define inputs and helper functions #

    with tf.variable_scope('inputs'):
        inputs = tf.placeholder(tf.float32, shape=(batch_size, input_side_length, input_side_length, in_channels), name='inputs')
        if sparse_labels:
            ground_truth = tf.placeholder(tf.int32, shape=(batch_size, input_side_length, input_side_length), name='labels')
        else:
            ground_truth = tf.placeholder(tf.float32, shape=(batch_size, input_side_length, input_side_length, out_channels), name='labels')
        keep_prob = tf.placeholder(tf.float32, shape=[], name='keep_prob')
        training = tf.placeholder(tf.bool, shape=[], name="training")

        network_input = tf.transpose(inputs, perm=[0, 3, 1, 2])

    # [conv -> conv -> max pool -> drop out] + parameter updates
    def step_down(name, input_, filter_size=3, res_blocks=2, keep_prob=1., training=False):

        with tf.variable_scope(name):
            
            with tf.variable_scope("res_block_0"):
                conv_out, tiled_input = layers.res_block(input_, filter_size, channel_multiplier=2, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW")
            
            for i in xrange(1, res_blocks):
                with tf.variable_scope("res_block_" + str(i)):
                    conv_out = layers.res_block(conv_out, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW")
            
            conv_out = conv_out + tiled_input

            pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW")
            
            bottom_out = layers.dropout(pool_out, keep_prob)
            side_out = layers.dropout(conv_out, keep_prob)

        return bottom_out, side_out

    # parameter updates + [upconv and concat -> drop out -> conv -> conv]
    def step_up(name, bottom_input, side_input, filter_size=3, res_blocks=2, keep_prob=1., training=False):

        with tf.variable_scope(name):
            added_input = layers.upconv_add_block(bottom_input, side_input, data_format="NCHW")

            conv_out = added_input
            for i in xrange(res_blocks):
                with tf.variable_scope("res_block_" + str(i)):
                    conv_out = layers.res_block(conv_out, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW")
            
            result = layers.dropout(conv_out, keep_prob)

        return result

    # Build the network #

    with tf.variable_scope('contracting'):

        outputs = []

        with tf.variable_scope("step_0"):

            # Conv 1
            in_filters = in_channels
            out_filters = start_filters

            stddev = np.sqrt(2. / (filter_size**2 * in_filters))
            w = layers.weight_variable([filter_size, filter_size, in_filters, out_filters], stddev=stddev, name="weights")

            out_ = tf.nn.conv2d(network_input, w, [1, 1, 1, 1], padding="SAME", data_format="NCHW")
            out_ = out_ + layers.bias_variable([out_filters, 1, 1], name='biases')

            # Batch Norm 1
            if batch_norm:
                out_ = tf.layers.batch_normalization(out_, axis=1, momentum=0.999, center=True, scale=True, training=training, trainable=True, name="batch_norm", fused=True)

            in_filters = out_filters

            # concatenated ReLU
            if activation == "crelu":
                out_ = tf.concat([out_, -out_], axis=1)
                in_filters = 2 * in_filters
            out_ = tf.nn.relu(out_)

            # Conv 2
            stddev = np.sqrt(2. / (filter_size**2 * in_filters))
            w = layers.weight_variable([filter_size, filter_size, in_filters, out_filters], stddev=stddev, name="weights")

            out_ = tf.nn.conv2d(out_, w, [1, 1, 1, 1], padding="SAME", data_format="NCHW")
            out_ = out_ + layers.bias_variable([out_filters, 1, 1], name='biases')

            # Res Block 1
            conv_out = layers.res_block(out_, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW")

            pool_out = layers.max_pool(conv_out, pool_size, data_format="NCHW")
            
            bottom_out = layers.dropout(pool_out, keep_prob)
            side_out = layers.dropout(conv_out, keep_prob)

            outputs.append(side_out)

        # Build contracting path
        for i in xrange(1, depth):
            bottom_out, side_out = step_down('step_' + str(i), bottom_out, filter_size=filter_size, res_blocks=res_blocks, keep_prob=keep_prob, training=training)
            outputs.append(side_out)

    # Bottom [conv -> conv]
    with tf.variable_scope('step_' + str(depth)):

        with tf.variable_scope("res_block_0"):
            conv_out, tiled_input = layers.res_block(bottom_out, filter_size, channel_multiplier=2, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW")
        for i in xrange(1, res_blocks):
            with tf.variable_scope("res_block_" + str(i)):
                conv_out = layers.res_block(conv_out, filter_size, channel_multiplier=1, depthwise_multiplier=2, convolutions=2, training=training, activation=activation, batch_norm=batch_norm, data_format="NCHW")
        
        conv_out = conv_out + tiled_input
        current_tensor = layers.dropout(conv_out, keep_prob)

    with tf.variable_scope('expanding'):

        # Set initial parameter
        outputs.reverse()

        # Build expanding path
        for i in xrange(depth):
            current_tensor = step_up('step_' + str(depth + i + 1), current_tensor, outputs[i], filter_size=filter_size, res_blocks=res_blocks, keep_prob=keep_prob, training=training)
 
    # Last layer is a 1x1 convolution to get the predictions
    # We don't want an activation function for this one (softmax will be applied later), so we're doing it manually
    in_filters = current_tensor.shape.as_list()[1]
    stddev = np.sqrt(2. / in_filters)

    with tf.variable_scope('classification'):

        w = layers.weight_variable([1, 1, in_filters, out_channels], stddev, name='weights')
        b = layers.bias_variable([out_channels, 1, 1], name='biases')

        conv = tf.nn.conv2d(current_tensor, w, strides=[1, 1, 1, 1], padding="SAME", data_format="NCHW", name='conv')
        logits = conv + b

        logits = tf.transpose(logits, perm=[0, 2, 3, 1])

    return inputs, logits, ground_truth, keep_prob, training
Ejemplo n.º 20
0
    def forward(self, x1, x2):
        """Inputs:
        x1 = premise word indices                [batch * len_1]
        x1_f = premise word features indices     [batch * len_1 * nfeat]
        x1_pos = premise POS tags                [batch * len_1]
        x1_ner = premise entity tags             [batch * len_1]
        x1_mask = premise padding mask           [batch * len_1]
        x2 = hypothesis word indices             [batch * len_2]
        x2_f = hypothesis word features indices  [batch * len_2 * nfeat]
        x2_pos = hypothesis POS tags             [batch * len_2]
        x2_ner = hypothesis entity tags          [batch * len_2]
        x2_mask = hypothesis padding mask        [batch * len_2]
        """
        # Prepare premise and hypothesis input
        Prnn_input_list = []
        Hrnn_input_list = []

        # Word embeddings
        emb = self.embedding if self.training else self.eval_embed
        x1_emb, x2_emb = emb(x1), emb(x2)
        # Dropout on embeddings
        if self.opt['dropout_emb'] > 0:
            x1_emb = layers.dropout(x1_emb,
                                    p=self.opt['dropout_emb'],
                                    training=self.training)
            x2_emb = layers.dropout(x2_emb,
                                    p=self.opt['dropout_emb'],
                                    training=self.training)
        Prnn_input_list.append(x1_emb)
        Hrnn_input_list.append(x2_emb)

        #         # Contextualized embeddings
        #         _, x1_cove = self.CoVe(x1, x1_mask)
        #         _, x2_cove = self.CoVe(x2, x2_mask)
        #         if self.opt['dropout_emb'] > 0:
        #             x1_cove = layers.dropout(x1_cove, p=self.opt['dropout_emb'], training=self.training)
        #             x2_cove = layers.dropout(x2_cove, p=self.opt['dropout_emb'], training=self.training)
        #         Prnn_input_list.append(x1_cove)
        #         Hrnn_input_list.append(x2_cove)
        #
        #         # POS embeddings
        #         x1_pos_emb = self.pos_embedding(x1_pos)
        #         x2_pos_emb = self.pos_embedding(x2_pos)
        #         Prnn_input_list.append(x1_pos_emb)
        #         Hrnn_input_list.append(x2_pos_emb)
        #
        #         # NER embeddings
        #         x1_ner_emb = self.ner_embedding(x1_ner)
        #         x2_ner_emb = self.ner_embedding(x2_ner)
        #         Prnn_input_list.append(x1_ner_emb)
        #         Hrnn_input_list.append(x2_ner_emb)
        #
        #         x1_input = torch.cat(Prnn_input_list, 2)
        #         x2_input = torch.cat(Hrnn_input_list, 2)

        # Now the features are ready
        # x1_input: [batch_size, doc_len, input_size]
        # x2_input: [batch_size, doc_len, input_size]

        x1_input = x1_emb
        x2_input = x2_emb

        #         if self.opt['full_att_type'] == 2:
        #             x1_f = layers.dropout(x1_f, p=self.opt['dropout_EM'], training=self.training)
        #             x2_f = layers.dropout(x2_f, p=self.opt['dropout_EM'], training=self.training)
        #             Paux_input, Haux_input = x1_f, x2_f
        #         else:
        #             Paux_input = x1_f[:, :, 0].contiguous().view(x1_f.size(0), x1_f.size(1), 1)
        #             Haux_input = x2_f[:, :, 0].contiguous().view(x2_f.size(0), x2_f.size(1), 1)

        # Encode premise with RNN
        P_abstr_ls = self.P_rnn(x1_input)
        # Encode hypothesis with RNN
        H_abstr_ls = self.H_rnn(x2_input)

        # Fusion
        if self.opt['full_att_type'] == 0:
            P_atts = P_abstr_ls[-1].contiguous()
            H_atts = H_abstr_ls[-1].contiguous()
            P_xs = P_abstr_ls[-1].contiguous()
            H_xs = H_abstr_ls[-1].contiguous()
        elif self.opt['full_att_type'] == 1:
            P_atts = torch.cat([x1_input] + P_abstr_ls, 2)
            H_atts = torch.cat([x2_input] + H_abstr_ls, 2)
            P_xs = P_abstr_ls[-1].contiguous()
            H_xs = H_abstr_ls[-1].contiguous()
        elif self.opt['full_att_type'] == 2:
            P_atts = torch.cat([x1_input] + P_abstr_ls, 2)
            H_atts = torch.cat([x2_input] + H_abstr_ls, 2)
            P_xs = torch.cat(P_abstr_ls, 2)
            H_xs = torch.cat(H_abstr_ls, 2)
        aP_xs = self.full_attn_P(P_atts, H_atts, P_xs, H_xs, None)
        aH_xs = self.full_attn_H(H_atts, P_atts, H_xs, P_xs, None)
        P_hiddens = torch.cat([P_xs, aP_xs], 2)
        H_hiddens = torch.cat([H_xs, aH_xs], 2)

        # Inference on premise and hypothesis
        P_hiddens = torch.cat(self.P_infer_rnn(P_hiddens, None), 2)
        H_hiddens = torch.cat(self.H_infer_rnn(H_hiddens, None), 2)

        # Merge hiddens for answer classification
        if self.opt['final_merge'] == 'avg':
            P_merge_weights = layers.uniform_weights(P_hiddens, None)
            H_merge_weights = layers.uniform_weights(H_hiddens, None)
        elif self.opt['final_merge'] == 'linear_self_attn':
            P_merge_weights = self.self_attn_P(P_hiddens, None)
            H_merge_weights = self.self_attn_H(H_hiddens, None)
        P_avg_hidden = layers.weighted_avg(P_hiddens, P_merge_weights)
        H_avg_hidden = layers.weighted_avg(H_hiddens, H_merge_weights)
        P_max_hidden = torch.max(P_hiddens, 1)[0]
        H_max_hidden = torch.max(H_hiddens, 1)[0]

        # Predict scores for different classes
        scores = self.classifier(
            torch.cat([P_avg_hidden, H_avg_hidden, P_max_hidden, H_max_hidden],
                      1))

        return scores  # -inf to inf
Ejemplo n.º 21
0
    def __init__(self, numpy_rng, theano_rng=None, 
            n_ins=N_FEATURES * N_FRAMES,
            relu_layers_sizes=[1024, 1024, 1024],
            recurrent_connections=[2],  # layer(s), can only be i^t -> i^{t+1}
            n_outs=62 * 3,
            rho=0.9, eps=1.E-6):
        """ TODO 
        """

        self.relu_layers = []
        self.dropout_relu_layers = []
        self.params = []
        self.dropout_params = []
        self.n_layers = len(relu_layers_sizes)
        self._rho = rho  # ``momentum'' for adadelta
        self._eps = eps  # epsilon for adadelta
        self._accugrads = []  # for adadelta
        self._accudeltas = []  # for adadelta
        self.n_outs = n_outs

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        self.x = T.fmatrix('x')
        self.y = T.ivector('y')

        input_dropout_rate = IN_DROPOUT_RATE
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = relu_layers_sizes[i-1]

            if i == 0:
                layer_input = self.x
                dropout_layer_input = dropout(numpy_rng, self.x, p=input_dropout_rate)
            else:
                layer_input = self.relu_layers[-1].output
                dropout_layer_input = self.dropout_relu_layers[-1].output
                input_dropout_rate = self.dropout_relu_layers[-1].dropout_rate

            if i in recurrent_connections:
                # TODO
                inputr_size = relu_layers_sizes[i]
                previous_output = T.fmatrix('previous_output')
                relu_layer = RecurrentReLU(rng=numpy_rng,
                        input=layer_input, in_stack=previous_output,
                        n_in=input_size, n_in_stack=inputr_size,
                        n_out=inputr_size)
                #relu_layer.in_stack = relu_layer.output # TODO TODO TODO
                # /TODO
                self.params.extend(relu_layer.params)
                self._accugrads.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accugrad_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accugrad_Ws', borrow=True)])
                self._accudeltas.extend([shared(value=numpy.zeros((n_ins, relu_layers_sizes[0]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[0], ), dtype='float32'), name='accudelta_b', borrow=True), shared(value=numpy.zeros((n_outs, relu_layers_sizes[0]), dtype='float32'), name='accudelta_Ws', borrow=True)])

            else:
                dropout_relu_layer = DropoutReLU(rng=numpy_rng,
                        input=dropout_layer_input,
                        n_in=input_size,
                        n_out=relu_layers_sizes[i])
                relu_layer = ReLU(rng=numpy_rng,
                        input=layer_input,
                        n_in=input_size,
                        n_out=relu_layers_sizes[i],
                        W=dropout_relu_layer.W * (1 - input_dropout_rate),
                        b=dropout_relu_layer.b * (1 - input_dropout_rate))
                        #b=dropout_relu_layer.b) TODO check

                self.dropout_params.extend(dropout_relu_layer.params)
                self.params.extend(relu_layer.params)

                self._accugrads.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accugrad_b', borrow=True)])
                self._accudeltas.extend([shared(value=numpy.zeros((input_size, relu_layers_sizes[i]), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((relu_layers_sizes[i], ), dtype='float32'), name='accudelta_b', borrow=True)])

            self.dropout_relu_layers.append(dropout_relu_layer)
            self.relu_layers.append(relu_layer)


        # We now need to add a logistic layer on top of the MLP
        self.dropout_logLayer = LogisticRegression(
            input=self.dropout_relu_layers[-1].output,
            n_in=relu_layers_sizes[-1],
            n_out=n_outs)
        self.logLayer = LogisticRegression(  # TODO check weights multiplication
            input=self.relu_layers[-1].output,
            n_in=relu_layers_sizes[-1],
            n_out=n_outs,
            W=self.dropout_logLayer.W * (1 - self.dropout_relu_layers[-1].dropout_rate),
            b=self.dropout_logLayer.b * (1 - self.dropout_relu_layers[-1].dropout_rate))
            #b=self.dropout_logLayer.b) TODO check
        self.dropout_params.extend(self.dropout_logLayer.params)
        self.params.extend(self.logLayer.params)
        self._accugrads.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accugrad_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accugrad_b', borrow=True)])
        self._accudeltas.extend([shared(value=numpy.zeros((relu_layers_sizes[-1], n_outs), dtype='float32'), name='accudelta_W', borrow=True), shared(value=numpy.zeros((n_outs, ), dtype='float32'), name='accudelta_b', borrow=True)])

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.dropout_finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
        self.dropout_finetune_cost_sum = self.dropout_logLayer.negative_log_likelihood_sum(self.y)

        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.finetune_cost_sum = self.logLayer.negative_log_likelihood_sum(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
Ejemplo n.º 22
0
def img_conv_group(input,
                   conv_num_filter,
                   pool_size,
                   conv_padding=1,
                   conv_filter_size=3,
                   conv_act=None,
                   conv_with_batchnorm=False,
                   conv_batchnorm_drop_rate=None,
                   pool_stride=1,
                   pool_type=None,
                   main_program=None,
                   startup_program=None):
    """
    Image Convolution Group, Used for vgg net.
    """
    tmp = input
    assert isinstance(conv_num_filter, list) or \
        isinstance(conv_num_filter, tuple)

    def __extend_list__(obj):
        if not hasattr(obj, '__len__'):
            return [obj] * len(conv_num_filter)
        else:
            return obj

    conv_padding = __extend_list__(conv_padding)
    conv_filter_size = __extend_list__(conv_filter_size)
    conv_with_batchnorm = __extend_list__(conv_with_batchnorm)
    conv_batchnorm_drop_rate = __extend_list__(conv_batchnorm_drop_rate)

    for i in xrange(len(conv_num_filter)):
        local_conv_act = conv_act
        if conv_with_batchnorm[i]:
            local_conv_act = None

        tmp = layers.conv2d(
            input=tmp,
            num_filters=conv_num_filter[i],
            filter_size=conv_filter_size[i],
            padding=conv_padding[i],
            act=local_conv_act,
            main_program=main_program,
            startup_program=startup_program)

        if conv_with_batchnorm[i]:
            tmp = layers.batch_norm(
                input=tmp,
                act=conv_act,
                main_program=main_program,
                startup_program=startup_program)
            drop_rate = conv_batchnorm_drop_rate[i]
            if abs(drop_rate) > 1e-5:
                tmp = layers.dropout(
                    x=tmp,
                    dropout_prob=drop_rate,
                    main_program=main_program,
                    startup_program=startup_program)

    pool_out = layers.pool2d(
        input=tmp,
        pool_size=pool_size,
        pool_type=pool_type,
        pool_stride=pool_stride,
        main_program=main_program,
        startup_program=startup_program)
    return pool_out
Ejemplo n.º 23
0
def img_conv_group(input,
                   conv_num_filter,
                   pool_size,
                   conv_padding=1,
                   conv_filter_size=3,
                   conv_act=None,
                   param_attr=None,
                   conv_with_batchnorm=False,
                   conv_batchnorm_drop_rate=0.0,
                   pool_stride=1,
                   pool_type="max",
                   use_cudnn=True,
                   use_mkldnn=False):
    """
    The Image Convolution Group is composed of Convolution2d, BatchNorm, DropOut,
    and Pool2d. According to the input arguments, img_conv_group will do serials of
    computation for Input using Convolution2d, BatchNorm, DropOut, and pass the last
    result to Pool2d.

    Args:
        input (Variable): The input image with [N, C, H, W] format.
        conv_num_filter(list|tuple): Indicates the numbers of filter of this group.
        pool_size (int|list|tuple): The pooling size of Pool2d Layer. If pool_size
            is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W).
            Otherwise, the pool_size_H = pool_size_W = pool_size.
        conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is
            a list or tuple, its length must be equal to the length of conv_num_filter.
            Otherwise the conv_padding of all Conv2d Layers are the same. Default 1.
        conv_filter_size (int|list|tuple): The filter size. If filter_size is a list or
            tuple, its length must be equal to the length of conv_num_filter.
            Otherwise the conv_filter_size of all Conv2d Layers are the same. Default 3.
        conv_act (str): Activation type for Conv2d Layer that is not followed by BatchNorm.
            Default: None.
        param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None
        conv_with_batchnorm (bool|list): Indicates whether to use BatchNorm after Conv2d Layer.
            If conv_with_batchnorm is a list, its length must be equal to the length of
            conv_num_filter. Otherwise, conv_with_batchnorm indicates whether all the
            Conv2d Layer follows a BatchNorm. Default False.
        conv_batchnorm_drop_rate (float|list): Indicates the drop_rate of Dropout Layer
            after BatchNorm. If conv_batchnorm_drop_rate is a list, its length must be
            equal to the length of conv_num_filter. Otherwise, drop_rate of all Dropout
            Layers is conv_batchnorm_drop_rate. Default 0.0.
        pool_stride (int|list|tuple): The pooling stride of Pool2d layer. If pool_stride
            is a list or tuple, it must contain two integers, (pooling_stride_H,
            pooling_stride_W). Otherwise, the pooling_stride_H = pooling_stride_W = pool_stride.
            Default 1.
        pool_type (str): Pooling type can be :math:`max` for max-pooling and :math:`avg` for
            average-pooling. Default :math:`max`.
        use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
            library is installed. Default: True
        use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled
            with mkldnn library. Default: False

    Return:
        Variable: The final result after serial computation using Convolution2d,
            BatchNorm, DropOut, and Pool2d.

    Examples:
        .. code-block:: python

            img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
            conv_pool = fluid.nets.img_conv_group(input=img,
                                                  num_channels=3,
                                                  conv_padding=1,
                                                  conv_num_filter=[3, 3],
                                                  conv_filter_size=3,
                                                  conv_act="relu",
                                                  pool_size=2,
                                                  pool_stride=2)
    """
    tmp = input
    assert isinstance(conv_num_filter, list) or \
        isinstance(conv_num_filter, tuple)

    def __extend_list__(obj):
        if not hasattr(obj, '__len__'):
            return [obj] * len(conv_num_filter)
        else:
            assert len(obj) == len(conv_num_filter)
            return obj

    conv_padding = __extend_list__(conv_padding)
    conv_filter_size = __extend_list__(conv_filter_size)
    param_attr = __extend_list__(param_attr)
    conv_with_batchnorm = __extend_list__(conv_with_batchnorm)
    conv_batchnorm_drop_rate = __extend_list__(conv_batchnorm_drop_rate)

    for i in xrange(len(conv_num_filter)):
        local_conv_act = conv_act
        if conv_with_batchnorm[i]:
            local_conv_act = None

        tmp = layers.conv2d(input=tmp,
                            num_filters=conv_num_filter[i],
                            filter_size=conv_filter_size[i],
                            padding=conv_padding[i],
                            param_attr=param_attr[i],
                            act=local_conv_act,
                            use_cudnn=use_cudnn,
                            use_mkldnn=use_mkldnn)

        if conv_with_batchnorm[i]:
            tmp = layers.batch_norm(input=tmp, act=conv_act, in_place=True)
            drop_rate = conv_batchnorm_drop_rate[i]
            if abs(drop_rate) > 1e-5:
                tmp = layers.dropout(x=tmp, dropout_prob=drop_rate)

    pool_out = layers.pool2d(input=tmp,
                             pool_size=pool_size,
                             pool_type=pool_type,
                             pool_stride=pool_stride,
                             use_cudnn=use_cudnn,
                             use_mkldnn=use_mkldnn)
    return pool_out