def model_function(features, targets, mode):

    hlayers = layers.stack(
        features,
        layers.fully_connected, [1000, 100, 50, 20],
        activation_fn=tf.nn.relu,
        weights_regularizer=layers.l1_l2_regularizer(1.0, 2.0),
        weights_initializer=layers.xavier_initializer(uniform=True, seed=100))

    # hidden layers have to be fully connected for best performance. So, no option in tensorflow for
    # non-fully connected layers; need to write custom code to do that

    outputs = layers.fully_connected(
        inputs=hlayers,
        num_outputs=10,  # 10 perceptrons in output layer for 10 numbers (0 to 9)
        activation_fn=None
    )  # Use "None" as activation function specified in "softmax_cross_entropy" loss

    # Calculate loss using cross-entropy error; also use the 'softmax' activation function
    loss = losses.softmax_cross_entropy(outputs, targets)

    optimizer = layers.optimize_loss(
        loss=loss,
        global_step=tf.contrib.framework.get_global_step(),
        learning_rate=0.8,
        optimizer="SGD")

    # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value
    # Returning both fractional values and corresponding labels
    probs = tf.nn.softmax(outputs)
    return {'probs': probs, 'labels': tf.argmax(probs, 1)}, loss, optimizer
예제 #2
0
    def init_layer(self, name_suffix, dims):
        wname = 'w' + name_suffix
        w = tf.get_variable(
            wname,
            initializer=tf.random_normal(dims),
            regularizer=l1_l2_regularizer(scale_l1=self.reg_beta,
                                          scale_l2=self.reg_beta),
            dtype=tf.float32)
        sw = tf.summary.histogram(wname, w)
        self.summary_weights.append(sw)

        bname = 'b' + name_suffix
        b = tf.get_variable(bname,
                            initializer=tf.random_normal([dims[1]]),
                            dtype=tf.float32)
        sb = tf.summary.histogram(bname, b)
        self.summary_weights.append(sb)

        self.transform_params[wname] = w
        self.transform_params[bname] = b

        wext = tf.placeholder(tf.float32, dims, name=wname + '_ext')
        bext = tf.placeholder(tf.float32, [dims[1]], name=bname + '_ext')

        w_transform_ops = w.assign(w * (1 - self.transform_lr) +
                                   wext * self.transform_lr)
        self.transform_ops.append(w_transform_ops)

        b_transform_ops = b.assign(b * (1 - self.transform_lr) +
                                   bext * self.transform_lr)
        self.transform_ops.append(b_transform_ops)

        return w, b
예제 #3
0
    def add_linear_output_layer(self, last_hidden_layer, ground_truth, corpus_tag, task_tag, loss_weight=1):
        # returns loss op
        with tf.variable_scope("output_layer_%s" % task_tag) as layer_scope:
            last_out = fully_connected(last_hidden_layer, 1, activation_fn=tf.identity,
                                       weights_regularizer=l1_l2_regularizer(self.l1_reg, self.l2_reg),
                                       scope=layer_scope)
            self.predictions = last_out

        with tf.name_scope("%s_loss_%s" % (corpus_tag, task_tag)):
            loss = loss_weight * tf.reduce_mean(tf.squared_difference(last_out, ground_truth))
            utils.variable_summaries(loss, "loss", corpus_tag)
            tf.add_to_collection(tf.GraphKeys.LOSSES, loss)

        with tf.name_scope('%s_accuracy_%s' % (corpus_tag, task_tag)):
            accuracy, _ = streaming_mean_relative_error(last_out, ground_truth, ground_truth,
                                                        name="acc_%s" % corpus_tag,
                                                        updates_collections=tf.GraphKeys.UPDATE_OPS)
            accuracy = 1 - accuracy
            utils.variable_summaries(accuracy, "accuracy", corpus_tag)

            updates_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.calculate_accuracy_op = control_flow_ops.with_dependencies(updates_op, accuracy)
예제 #4
0
    def init_neurons(self, input_layer, wname, wnum, bias_name=None):
        ishape = input_layer.get_shape()[1].value
        dims = [ishape, wnum]

        w = tf.get_variable(
            wname,
            initializer=tf.random_normal(dims),
            regularizer=l1_l2_regularizer(scale_l1=self.reg_beta,
                                          scale_l2=self.reg_beta),
            dtype=tf.float32)
        sw = tf.summary.histogram(wname, w)
        self.summary_weights.append(sw)

        self.transform_params[wname] = w
        wext = tf.placeholder(tf.float32, dims, name=wname + '_ext')
        w_transform_ops = w.assign(w * (1 - self.transform_lr) +
                                   wext * self.transform_lr)
        self.transform_ops.append(w_transform_ops)

        h = tf.matmul(input_layer, w)

        if bias_name:
            b = tf.get_variable(bias_name,
                                initializer=tf.random_normal([wnum]),
                                dtype=tf.float32)
            sb = tf.summary.histogram(bias_name, b)
            self.summary_weights.append(sb)

            self.transform_params[bias_name] = b

            bext = tf.placeholder(tf.float32, [wnum], name=bias_name + '_ext')

            b_transform_ops = b.assign(b * (1 - self.transform_lr) +
                                       bext * self.transform_lr)
            self.transform_ops.append(b_transform_ops)

            h = tf.add(h, b)

        return h
예제 #5
0
    def make_hidden_FN_layers(self, input_layer):
        previous_out = input_layer

        with tf.variable_scope("hidden_layers"):
            for i in range(1, self.num_layers + 1):
                with tf.variable_scope("layer%d" % i) as layer_scope:
                    if self.is_residual and i > 1:
                        previous_out = tf.add(previous_out, tf.ones_like(previous_out))
                    previous_out = fully_connected(previous_out,
                                                   self.num_hidden_units, activation_fn=tf.nn.relu,
                                                   normalizer_fn=batch_norm,
                                                   normalizer_params={"scale": i == self.num_layers,
                                                                      "is_training": self.is_training,
                                                                      "decay": 0.9},
                                                   weights_regularizer=l1_l2_regularizer(self.l1_reg, self.l2_reg),
                                                   scope=layer_scope)

                    # if i == self.num_layers:
                    if i % 2 == 0:
                        previous_out = tf.nn.dropout(previous_out, self.keep_prob)

        last_hidden_layer = previous_out
        return last_hidden_layer
예제 #6
0
    def add_classification_output_layer(self, last_hidden_layer, gt_labels, num_classes, corpus_tag, task_tag,
                                        loss_weight=1):
        # returns loss op
        with tf.variable_scope("output_layer_%s" % task_tag) as layer_scope:
            last_out = fully_connected(last_hidden_layer, num_classes, activation_fn=tf.identity,
                                       weights_regularizer=l1_l2_regularizer(self.l1_reg, self.l2_reg),
                                       scope=layer_scope)
            self.predictions = tf.nn.softmax(last_out)

        with tf.name_scope("%s_loss_%s" % (corpus_tag, task_tag)):
            loss = loss_weight * tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(last_out, gt_labels))
            utils.variable_summaries(loss, "loss", corpus_tag)
            tf.add_to_collection(tf.GraphKeys.LOSSES, loss)

        with tf.name_scope('%s_accuracy_%s' % (corpus_tag, task_tag)):
            # correct_prediction = tf.equal(tf.argmax(last_out, 1), gt_labels)
            # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) * 100
            accuracy, _ = streaming_accuracy(tf.argmax(last_out, 1), gt_labels, name="acc_%s" % corpus_tag,
                                             updates_collections=tf.GraphKeys.UPDATE_OPS)

            utils.variable_summaries(accuracy, "accuracy", corpus_tag)

            updates_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.calculate_accuracy_op = control_flow_ops.with_dependencies(updates_op, accuracy)
예제 #7
0
    def fit(self, x, y):
        """
        Fit a ConvDeconv2D model on data

        Arguments
        ---------
        x : np.ndarray
            array with 3 dimensions (nb_samples, height, width) or 
            array with 4 dimensions (nb_samples, height, width, channels)

        y : np.ndarray
            array with 3 dimensions (nb_samples, height, width) or 
            array with 4 dimensions (nb_samples, height, width, channels)
        """
        tf.reset_default_graph()
        x_orig, y_orig, y_onehot = process_inputs_2D(x, y, self.NB_CLASSES)

        in_shape = [None] + list(x_orig.shape[1:])
        orig_out_shape = list(y_orig.shape[1:])
        soft_out_shape = [None] + list(y_onehot.shape[1:])

        ### CONSTRUCTION PHASE ###
        X = tf.placeholder(tf.float32, shape=in_shape, name='X')
        y = tf.placeholder(tf.int32, shape=soft_out_shape, name='y')

        # CONV LAYERS #
        with tf.variable_scope('conv_layers'):
            with framework.arg_scope([layers.conv2d],
                                    weights_initializer=layers.xavier_initializer(),
                                    weights_regularizer=layers.l1_l2_regularizer(\
                                        scale_l1=self.L1_PENALTY,scale_l2=self.L2_PENALTY),
                                    activation_fn=tf.nn.relu,
                                    padding='SAME'):
                for idx, c in enumerate(self.CONV_LAYERS):
                    if idx == 0:
                        # connect to input tensor
                        conv = layers.conv2d(X, c[0], (c[1], c[1]), stride=1)
                    else:
                        # connect to previous conv layer
                        conv = layers.conv2d(conv,
                                             c[0], (c[1], c[1]),
                                             stride=1)

        # DECONV LAYERS #
        with tf.variable_scope('deconv_layers'):
            with framework.arg_scope([layers.conv2d_transpose],
                                    weights_initializer=layers.xavier_initializer(),
                                    activation_fn=tf.nn.relu,
                                    weights_regularizer=layers.l1_l2_regularizer(\
                                        scale_l1=self.L1_PENALTY,scale_l2=self.L2_PENALTY),
                                    padding='SAME'):
                for idx, c in enumerate(self.CONV_LAYERS[::-1]):
                    if idx < len(self.CONV_LAYERS) - 1:
                        # not last layer
                        conv = layers.conv2d_transpose(conv,
                                                       c[0], (c[1], c[1]),
                                                       stride=1)
                    else:
                        # last layer
                        conv = layers.conv2d_transpose(
                            conv,
                            orig_out_shape[-1] * self.NB_CLASSES, (c[1], c[1]),
                            stride=1)

        # SOFTMAX RESHAPE LAYER #
        with tf.variable_scope('softmax_layer'):
            soft_shape = [
                tf.shape(conv)[0],
                np.prod(orig_out_shape), self.NB_CLASSES
            ]
            softmax_reshape = tf.reshape(conv, soft_shape)

        # LOSS #
        with tf.name_scope('loss'):
            loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(softmax_reshape, y))

        # OPTIMIZER #
        with tf.variable_scope('train'):
            optimizer = tf.train.AdamOptimizer(learning_rate=self.LEARN_RATE)
            train_op = optimizer.minimize(loss)

        # EVALUATORS #

        with tf.name_scope('eval'):
            prob_map = tf.nn.softmax(softmax_reshape)
            soft_flat = tf.reshape(softmax_reshape,
                                   [-1, self.NB_CLASSES])  # (logits, classes)
            y_flat = tf.reshape(tf.argmax(y, 2), [-1])  # (classes,)
            correct = tf.nn.in_top_k(soft_flat, y_flat, 1)
            accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

        ### EXECUTION PHASE ###

        # PRE-EXECUTION VARIABLES #
        if self.SAVE_PATH is not None:
            saver = tf.train.Saver()
        best_test_loss = 1e9
        init = tf.global_variables_initializer()

        # TRAINING ROUTINE #
        with tf.Session() as sess:

            if self.RESTORE_PATH is not None:
                print('Restoring Model')
                saver.restore(sess, self.RESTORE_PATH)
            else:
                print('Initializing Model')
                sess.run(init)

            for epoch in range(self.NB_EPOCH):
                for b_idx in range(int(x_orig.shape[0] / self.BATCH_SIZE)):
                    xbatch = x_orig[b_idx * self.BATCH_SIZE:(b_idx + 1) *
                                    self.BATCH_SIZE]
                    ybatch = y_onehot[b_idx * self.BATCH_SIZE:(b_idx + 1) *
                                      self.BATCH_SIZE]
                    # run train op
                    sess.run(train_op, feed_dict={X: xbatch, y: ybatch})
                # get test statistics
                test_acc = sess.run(accuracy, feed_dict={X: xbatch, y: ybatch})
                test_loss = sess.run(loss, feed_dict={X: xbatch, y: ybatch})
                print('Epoch : %i , Test Loss : %.04f, Test Acc: %.04f' %
                      (epoch, test_loss, test_acc))

                # save model after each epoch if test loss is the best
                if self.SAVE_PATH is not None and test_loss < best_test_loss:
                    best_test_loss = test_loss
                    saver.save(sess, self.SAVE_PATH, write_meta_graph=False)
예제 #8
0
def buildGraph(input_placeholder_s1, input_placeholder_s2, labels_placeholder,
               mask_placeholder_s1, mask_placeholder_s2, dropout_placeholder,
               embeddings_matrix):
    params = tf.Variable(embeddings_matrix)
    tensor_s1 = tf.nn.embedding_lookup(params, input_placeholder_s1)
    tensor_s2 = tf.nn.embedding_lookup(params, input_placeholder_s2)
    embeddings_s1 = tf.reshape(tensor_s1, [-1, max_length, embed_size])
    embeddings_s2 = tf.reshape(tensor_s2, [-1, max_length, embed_size])
    #print embeddings_s1.shape
    #print tf.boolean_mask(embeddings_s1, mask_placeholder_s1, axis=1).shape
    #embeddings = tf.concat([tf.reduce_mean(tf.boolean_mask(embeddings_s1, mask_placeholder_s1), axis=1), tf.reduce_mean(tf.boolean_mask(embeddings_s2, mask_placeholder_s2), axis=1)], 0)
    #print embeddings.shape

    dropout_rate = dropout_placeholder

    preds = []
    cell1 = LSTMCell(embed_size, hidden_size)
    cell2 = LSTMCell2(embed_size, hidden_size)

    c = tf.zeros([tf.shape(embeddings_s1)[0], hidden_size])
    h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size])
    initial_state = tf.contrib.rnn.LSTMStateTuple(c, h)
    l1 = tf.reduce_sum(tf.cast(mask_placeholder_s1, tf.int32), axis=1)
    outputs1, state1 = tf.nn.dynamic_rnn(cell1,
                                         embeddings_s1,
                                         dtype=tf.float32,
                                         initial_state=initial_state,
                                         sequence_length=l1)
    h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size])
    initial_state = tf.contrib.rnn.LSTMStateTuple(state1.c, h)
    l2 = tf.reduce_sum(tf.cast(mask_placeholder_s2, tf.int32), axis=1)
    outputs2, state2 = tf.nn.dynamic_rnn(cell2,
                                         embeddings_s2,
                                         dtype=tf.float32,
                                         initial_state=initial_state,
                                         sequence_length=l2)

    #START HERE, CHECK PREDS, DO BITMASK FOR LOSSES, MAKE SURE OPTIMIZING CORRECT FUNCTION
    func = xavier_weight_init()
    U = tf.Variable(func([hidden_size, n_classes]))
    b1 = tf.Variable(tf.zeros([1, n_classes]))
    h_drop = tf.nn.dropout(state2.h, keep_prob=1 - dropout_rate)
    pred = tf.matmul(h_drop, U) + b1
    tf.add_to_collection('ops_to_restore', pred)
    #pred = tf.add(tf.matmul(h_drop, U), b1, name="pred")

    loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_placeholder,
                                                   logits=pred)
    loss = tf.reduce_mean(loss)
    regularizer = l1_l2_regularizer(l1_reg, l2_reg)
    reg_loss = apply_regularization(regularizer, tf.trainable_variables())
    loss += reg_loss
    #y = labels_placeholder
    #loss = tf.nn.l2_loss(y-preds)
    #loss = tf.reduce_mean(loss)

    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    #train_op = optimizer.minimize(loss)

    #optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
    gradients = optimizer.compute_gradients(loss)
    grads = [x[0] for x in gradients]
    grads, global_norm = tf.clip_by_global_norm(grads, max_grad_norm)
    gradients = [(grads[i], gradients[i][1]) for i in range(len(grads))]
    train_op = optimizer.apply_gradients(gradients)
    return pred, loss, train_op
예제 #9
0
def nn(x,
       reuse=True,
       nchstart=32,
       act_fn=tf.nn.leaky_relu,
       TRAIN_FLAG=True,
       REG=False):
    """
    Takes as input the (processed) measurements and 
    estimates a projection of the original image.

    Params
    ------
    x: batch_size, img_size, img_size, nch
    reuse: reuse variables flag
    nchstart: number of output channels in the first convolutional layer
    act_fn: activation function
    REG: Flag to add regularization loss
    TRAIN_FLAG: 'is_training' flag for batch_norm

    Returns
    -------
    out: [batch_size, img_size*img_size] vectors on which the projection will be applied
    reg_loss: scalar, regularization loss in the middle layer, 0 if REG is False

    """
    nchannels = nchstart
    normalizer_params = {'is_training': TRAIN_FLAG}

    reg = tcl.l1_l2_regularizer(scale_l1=1e-4, scale_l2=1e-4)
    reg_loss = 0

    params = {
        'kernel_size': 3,
        'activation_fn': act_fn,
        'normalizer_fn': tcl.batch_norm,
        'normalizer_params': normalizer_params
    }

    with tf.variable_scope('projector', reuse=reuse):
        """Downsampling layers"""

        # Block 1
        out1_1 = tcl.conv2d(x, num_outputs=nchannels, **params)
        out1_2 = tcl.conv2d(out1_1, num_outputs=nchannels, **params)
        out_mp1 = tcl.max_pool2d(out1_2, kernel_size=[2, 2], stride=2)

        # Block 2
        out2_1 = tcl.conv2d(out_mp1, num_outputs=2 * nchannels, **params)
        out2_2 = tcl.conv2d(out2_1, num_outputs=2 * nchannels, **params)
        out_mp2 = tcl.max_pool2d(out2_2, kernel_size=[2, 2], stride=2)

        # Block 3
        out3_1 = tcl.conv2d(out_mp2, num_outputs=4 * nchannels, **params)
        out3_2 = tcl.conv2d(out3_1, num_outputs=4 * nchannels, **params)
        out_mp3 = tcl.max_pool2d(out3_2, kernel_size=[2, 2], stride=2)

        # Block 4
        out4_1 = tcl.conv2d(out_mp3, num_outputs=8 * nchannels, **params)
        out4_2 = tcl.conv2d(out4_1, num_outputs=8 * nchannels, **params)
        out_mp4 = tcl.max_pool2d(out4_2, kernel_size=[2, 2], stride=2)

        # Block 5
        out5_1 = tcl.conv2d(out_mp4, num_outputs=16 * nchannels, **params)
        out5_2 = tcl.conv2d(out5_1, num_outputs=16 * nchannels, **params)

        # regularization
        if REG:
            reg_loss = reg(tcl.flatten(out5_2))
        """Upsampling layers"""

        # Block 1
        up_out1_1 = tf.keras.layers.UpSampling2D((2, 2))(out5_2)
        up_out1_1 = tf.concat([out4_2, up_out1_1], axis=3, name='skip_1')
        up_out1_1 = tcl.conv2d(up_out1_1, num_outputs=8 * nchannels, **params)
        up_out1_2 = tcl.conv2d(up_out1_1, num_outputs=8 * nchannels, **params)

        # Block 2
        up_out2_1 = tf.keras.layers.UpSampling2D((2, 2))(up_out1_2)
        up_out2_1 = tf.concat([out3_2, up_out2_1], axis=3, name='skip_2')
        up_out2_1 = tcl.conv2d(up_out2_1, num_outputs=4 * nchannels, **params)
        up_out2_2 = tcl.conv2d(up_out2_1, num_outputs=4 * nchannels, **params)

        # Block 3
        up_out3_1 = tf.keras.layers.UpSampling2D((2, 2))(up_out2_2)
        up_out3_1 = tf.concat([out2_2, up_out3_1], axis=3, name='skip_3')
        up_out3_1 = tcl.conv2d(up_out3_1, num_outputs=2 * nchannels, **params)
        up_out3_2 = tcl.conv2d(up_out3_1, num_outputs=2 * nchannels, **params)

        # Block 4
        up_out4_1 = tf.keras.layers.UpSampling2D((2, 2))(up_out3_2)
        up_out4_1 = tf.concat([out1_2, up_out4_1], axis=3, name='skip_4')
        up_out4_1 = tcl.conv2d(up_out4_1, num_outputs=nchannels, **params)
        up_out4_2 = tcl.conv2d(up_out4_1, num_outputs=nchannels, **params)

        # Block 5
        up_out5_1 = tcl.conv2d(up_out4_2, num_outputs=1, **params)

        out = tf.contrib.layers.flatten(up_out5_1)

    return out, reg_loss
예제 #10
0
def model_function(features, targets, mode):

    # input layer
    # Reshape features to 4-D tensor (55000x28x28x1)
    # MNIST images are 28x28 pixels
    # batch size corresponds to number of images: -1 represents ' compute the # images automatically (55000)'
    # +1 represents the # channels. Here #channels =1 since grey image. For color image, #channels=3
    input_layer = tf.reshape(features, [-1, 28, 28, 1])

    # Computes 32 features using a 5x5 filter
    # Padding is added to preserve width
    # Input Tensor Shape: [batch_size,28,28,1]
    # Output Tensor Shape: [batch_size,28,28,32]
    conv1 = layers.conv2d(
        inputs=input_layer,
        num_outputs=32,
        kernel_size=[5, 5],
        stride=1,
        padding=
        "SAME",  # do so much padding such that the feature map is same size as input
        activation_fn=tf.nn.relu)

    # Pooling layer 1
    # Pooling layer ith a 2x2 filter and stride 2
    # Input shape: [batch_size,28,28,32]
    # Output shape: [batch_size,14,14,32]
    pool1 = layers.max_pool2d(inputs=conv1, kernel_size=[2, 2], stride=2)

    # Convolution layer 2
    # Input: 14 x 14 x 32 (32 channels here)
    # Output: 14 x 14 x 64  (32 features/patches fed to each perceptron; discovering 64 features)
    conv2 = layers.conv2d(
        inputs=pool1,
        num_outputs=64,
        kernel_size=[5, 5],
        stride=1,
        padding=
        "SAME",  # do so much padding such that the feature map is same size as input
        activation_fn=tf.nn.relu)

    # Pooling layer 2
    # Input: 14 x14 x 64
    # Output: 7 x 7 x 64
    pool2 = layers.max_pool2d(inputs=conv2, kernel_size=[2, 2], stride=2)

    # Flatten the pool2 to feed to the 1st layer of fully connected layers
    # Input size: [batch_size,7,7,64]
    # Output size: [batch_size, 7x7x64]
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])

    # Connected layers with 100, 20 neurons
    # Input shape: [batch_size, 7x7x64]
    # Output shape: [batch_size, 10]
    fclayers = layers.stack(
        pool2_flat,
        layers.fully_connected, [100, 20],
        activation_fn=tf.nn.relu,
        weights_regularizer=layers.l1_l2_regularizer(1.0, 2.0),
        weights_initializer=layers.xavier_initializer(uniform=True, seed=100))

    outputs = layers.fully_connected(
        inputs=fclayers,
        num_outputs=10,  # 10 perceptrons in output layer for 10 numbers (0 to 9)
        activation_fn=None
    )  # Use "None" as activation function specified in "softmax_cross_entropy" loss

    # Calculate loss using cross-entropy error; also use the 'softmax' activation function
    loss = losses.softmax_cross_entropy(outputs, targets)

    optimizer = layers.optimize_loss(
        loss=loss,
        global_step=tf.contrib.framework.get_global_step(),
        learning_rate=0.1,
        optimizer="SGD")

    # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value
    # Returning both fractional values and corresponding labels
    probs = tf.nn.softmax(outputs)
    return {'probs': probs, 'labels': tf.argmax(probs, 1)}, loss, optimizer
예제 #11
0
def buildGraph(input_placeholder_s1, input_placeholder_s2, labels_placeholder,
               mask_placeholder_s1, mask_placeholder_s2, dropout_placeholder,
               embeddings_matrix):
    params = tf.Variable(embeddings_matrix)
    tensor_s1 = tf.nn.embedding_lookup(params, input_placeholder_s1)
    tensor_s2 = tf.nn.embedding_lookup(params, input_placeholder_s2)
    embeddings_s1 = tf.reshape(tensor_s1, [-1, max_length, embed_size])
    embeddings_s2 = tf.reshape(tensor_s2, [-1, max_length, embed_size])
    #print embeddings_s1.shape
    #print tf.boolean_mask(embeddings_s1, mask_placeholder_s1, axis=1).shape
    #embeddings = tf.concat([tf.reduce_mean(tf.boolean_mask(embeddings_s1, mask_placeholder_s1), axis=1), tf.reduce_mean(tf.boolean_mask(embeddings_s2, mask_placeholder_s2), axis=1)], 0)
    #print embeddings.shape

    dropout_rate = dropout_placeholder

    preds = []
    cell1 = LSTMCell(embed_size, hidden_size)
    cell2 = LSTMCell2(embed_size, hidden_size)

    c = tf.zeros([tf.shape(embeddings_s1)[0], hidden_size])
    h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size])
    initial_state = tf.contrib.rnn.LSTMStateTuple(c, h)
    l1 = tf.reduce_sum(tf.cast(mask_placeholder_s1, tf.int32), axis=1)
    outputs1, state1 = tf.nn.dynamic_rnn(cell1,
                                         embeddings_s1,
                                         dtype=tf.float32,
                                         initial_state=initial_state,
                                         sequence_length=l1)
    h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size])
    initial_state = tf.contrib.rnn.LSTMStateTuple(state1.c, h)
    l2 = tf.reduce_sum(tf.cast(mask_placeholder_s2, tf.int32), axis=1)
    outputs2, state2 = tf.nn.dynamic_rnn(cell2,
                                         embeddings_s2,
                                         dtype=tf.float32,
                                         initial_state=initial_state,
                                         sequence_length=l2)

    func = xavier_weight_init()

    # Implementation of attention on the final hidden layer
    Y = tf.transpose(outputs1, perm=[0, 2, 1])
    W_y = tf.Variable(func([hidden_size, hidden_size]))
    W_h = tf.Variable(func([hidden_size, hidden_size]))
    e_l = tf.constant(1.0, shape=[1, max_length])
    WY = tf.tensordot(W_y, Y, axes=[[0], [1]])
    WY = tf.transpose(WY, perm=[1, 0, 2])
    h_n = tf.reshape(state2.h, shape=[-1, hidden_size, 1])
    Whe = tf.tensordot(h_n, e_l, axes=[[2], [0]])
    Whe = tf.tensordot(W_h, Whe, axes=[[0], [1]])
    Whe = tf.transpose(Whe, perm=[1, 0, 2])
    M = tf.tanh(WY + Whe)
    w_alpha = tf.Variable(func([1, hidden_size]))
    alpha = tf.nn.softmax(tf.tensordot(w_alpha, M, axes=[[1], [1]]))
    alpha = tf.transpose(alpha, perm=[1, 2, 0])
    alpha = tf.reshape(alpha, shape=[-1, max_length, 1])
    #alpha_entries = tf.unstack(alpha, axis = 0, num=[tf.shape(embeddings_s1)[0]])
    #Y_entries = tf.unstack(Y, axis=0, num=[tf.shape(embeddings_s1)[0]])
    #r = tf.stack([tf.matmul(Y_entries[i], alpha_entries[i]) for i in len(alpha.shape[0])], axis=0)

    #print Y.shape, alpha.shape
    #r = tf.tensordot(Y, alpha, axes=[[2], [1]])
    #r = tf.reduce_mean(r, axis=2)
    #r = r[:, :, 0, :]
    #r = tf.diag_part(r)
    r = tf.matmul(Y, alpha)
    r = tf.reshape(r, shape=[-1, hidden_size])
    #r = Y * alpha
    #print r.shape
    #r = tf.matmul(Y, tf.transpose(alpha, perm=[0, 2, 1]))

    U = tf.Variable(func([hidden_size, n_classes]))
    b1 = tf.Variable(tf.zeros([1, n_classes]))
    W_p = tf.Variable(func([hidden_size, hidden_size]))
    W_x = tf.Variable(func([hidden_size, hidden_size]))
    #print r.shape, state2.h.shape
    hstar = tf.tanh(tf.matmul(r, W_p) + tf.matmul(state2.h, W_x))
    #hstar = tf.tanh(tf.matmul(state2.h, W_x))
    h_drop = tf.nn.dropout(hstar, keep_prob=1 - dropout_rate)
    pred = tf.matmul(h_drop, U) + b1
    #pred = tf.add(tf.matmul(h_drop, U), b1, name="pred")

    loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_placeholder,
                                                   logits=pred)
    loss = tf.reduce_mean(loss)
    regularizer = l1_l2_regularizer(l1_reg, l2_reg)
    reg_loss = apply_regularization(regularizer, tf.trainable_variables())
    loss += reg_loss
    #y = labels_placeholder
    #loss = tf.nn.l2_loss(y-preds)
    #loss = tf.reduce_mean(loss)

    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    #train_op = optimizer.minimize(loss)

    #optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
    gradients = optimizer.compute_gradients(loss)
    grads = [x[0] for x in gradients]
    grads, global_norm = tf.clip_by_global_norm(grads, max_grad_norm)
    gradients = [(grads[i], gradients[i][1]) for i in range(len(grads))]
    train_op = optimizer.apply_gradients(gradients)
    return pred, loss, train_op
예제 #12
0
# -*- coding:utf-8 -*-
import tensorflow as tf
import new_eval2 as new_eval
from tensorflow.contrib import layers

regularizer = layers.l1_l2_regularizer(scale_l1=1e-6, scale_l2=1e-6)


def S_matri(x1, x2):
    normalized_q = tf.nn.l2_normalize(x1, dim=2)
    normalized_a = tf.nn.l2_normalize(x2, dim=2)
    matri = tf.matmul(normalized_q, tf.transpose(normalized_a, perm=[0, 2, 1]))
    return matri


class GRU_first(object):
    def __init__(self, input, n_output, n_skip, batch_size):
        self.xt_ini = input
        self.batch_size = batch_size
        self.time_step = int(self.xt_ini.get_shape()[1])
        self.n_input = int(self.xt_ini.get_shape()[2])
        self.n_output = n_output
        with tf.variable_scope("gru_q_a"):
            self.skip_Wr = tf.get_variable(shape=[self.n_input, self.n_output],
                                           name="skip_Wr",
                                           regularizer=regularizer)
            self.skip_Ur = tf.get_variable(
                shape=[self.n_output, self.n_output],
                name="skip_Ur",
                regularizer=regularizer)
            self.skip_br = tf.get_variable(name='skip_br',
def build_resnet(repetitions=(2, 2, 2, 2),
                 include_top=True,
                 input_tensor=None,
                 input_shape=None,
                 classes=1000,
                 block_type='usual',
                 l1_regular=0.01,
                 l2_regular=0.01):
    """
    TODO
    """

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=197,
                                      data_format='channels_last',
                                      require_flatten=include_top)

    if input_tensor is None:
        img_input = Input(shape=input_shape, name='data')
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # get parameters for model layers
    no_scale_bn_params = get_bn_params(scale=False)
    bn_params = get_bn_params()
    conv_params = get_conv_params()
    init_filters = 64

    if block_type == 'basic':
        conv_block = basic_conv_block
        identity_block = basic_identity_block
    else:
        conv_block = usual_conv_block
        identity_block = usual_identity_block
    regular = l1_l2_regularizer(scale_l1=l1_regular, scale_l2=l2_regular)
    # resnet bottom
    x = BatchNormalization(name='bn_data', **no_scale_bn_params)(img_input)
    x = ZeroPadding2D(padding=(3, 3))(x)
    x = Conv2D(init_filters, (7, 7),
               strides=(2, 2),
               kernel_regularizer=regular,
               name='conv0',
               **conv_params)(x)
    x = BatchNormalization(name='bn0', **bn_params)(x)
    x = Activation('relu', name='relu0')(x)
    x = ZeroPadding2D(padding=(1, 1))(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='valid',
                     name='pooling0')(x)

    # resnet body
    for stage, rep in enumerate(repetitions):
        for block in range(rep):

            filters = init_filters * (2**stage)

            # first block of first stage without strides because we have maxpooling before
            if block == 0 and stage == 0:
                x = conv_block(filters,
                               stage,
                               block,
                               strides=(1, 1),
                               l1_regular=l1_regular,
                               l2_regular=l2_regular)(x)

            elif block == 0:
                x = conv_block(filters,
                               stage,
                               block,
                               strides=(2, 2),
                               l1_regular=l1_regular,
                               l2_regular=l2_regular)(x)

            else:
                x = identity_block(filters,
                                   stage,
                                   block,
                                   l1_regular=l1_regular,
                                   l2_regular=l2_regular)(x)

    x = BatchNormalization(name='bn1', **bn_params)(x)
    x = Activation('relu', name='relu1')(x)

    # resnet top
    if include_top:
        x = GlobalAveragePooling2D(name='pool1')(x)
        x = Dense(classes, name='fc1')(x)
        x = Activation('softmax', name='softmax')(x)

    # Ensure that the model takes into account any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x)

    return model
예제 #14
0
import tensorflow as tf
from tensorflow.contrib.layers import l1_regularizer, l2_regularizer, l1_l2_regularizer

REGULARISATORS = {
    'none': lambda arg: tf.constant(0.0),
    'l1': l1_regularizer(1.0),
    'l2': l2_regularizer(1.0),
    'l1_l2': l1_l2_regularizer(1.0, 1.0)
}

NBS_EPOCHS = [5, 10, 20, 50, 100, 200]

BATCH_SIZES = [1, 16, 32, 64, 128, 512]

ARCHITECTURES = [
    [128] * 0,
    [128] * 1,
]
#[128] * 2,
#[128] * 3,
#[128] * 4]

OPTIMISERS = [
    tf.train.AdamOptimizer(learning_rate=1e-3),
    tf.train.GradientDescentOptimizer(learning_rate=1e-3),
    tf.train.AdadeltaOptimizer(learning_rate=1e-3),
    tf.train.RMSPropOptimizer(learning_rate=1e-3)
]