Example #1
0
    def _get_mean(self, _input):
        """
    """
        dirs = self.directives
        num_layers = dirs['num_layers']
        num_nodes = dirs['num_nodes']
        activation = dirs['activation']
        net_grow_rate = dirs['net_grow_rate']

        output_dim = self._oslot_to_shape[0][-1]
        with tf.variable_scope(self.name + '_mean', reuse=tf.AUTO_REUSE):
            # Define the Means
            hid_layer = fully_connected(
                _input,
                num_nodes,
                activation_fn=activation,
                biases_initializer=tf.random_normal_initializer(
                    stddev=1 / np.sqrt(num_nodes)))
            for _ in range(num_layers - 1):
                num_nodes = int(num_nodes * net_grow_rate)
                hid_layer = fully_connected(
                    hid_layer,
                    num_nodes,
                    activation_fn=activation,
                    biases_initializer=tf.random_normal_initializer(
                        stddev=1 / np.sqrt(num_nodes)))
            mean = fully_connected(hid_layer, output_dim, activation_fn=None)

        return mean, hid_layer
Example #2
0
def embedding_model(im_feats,
                    sent_feats,
                    train_phase,
                    im_labels,
                    fc_dim=2048,
                    embed_dim=512):
    """
        Build two-branch embedding networks.
        fc_dim: the output dimension of the first fc layer.
        embed_dim: the output dimension of the second fc layer, i.e.
                   embedding space dimension.
    """
    # Image branch.
    im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1')
    im_fc2 = fully_connected(im_fc1,
                             embed_dim,
                             activation_fn=None,
                             scope='im_embed_2')
    i_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)
    # Text branch.
    sent_fc1 = add_fc(sent_feats, fc_dim, train_phase, 'sent_embed_1')
    sent_fc2 = fully_connected(sent_fc1,
                               embed_dim,
                               activation_fn=None,
                               scope='sent_embed_2')
    s_embed = tf.nn.l2_normalize(sent_fc2, 1, epsilon=1e-10)
    return i_embed, s_embed
Example #3
0
def get_network(img_emb, sent_emb):
	im_fc1 = add_fc(img_emb, 2048, True, 'im_embed_1')
	im_fc2 = fully_connected(im_fc1, 512, activation_fn=None, scope = 'im_embed_2')
	i_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)
	sent_fc1 = add_fc(sent_emb, 2048, True,'sent_embed_1')
	sent_fc2 = fully_connected(sent_fc1, 512, activation_fn=None, scope = 'sent_embed_2')
	s_embed = tf.nn.l2_normalize(sent_fc2, 1, epsilon=1e-10)
	return i_embed, s_embed
Example #4
0
  def _build_out_default(self, Z):
    """
    """
    ydim  = self.ydim
#     with tf.variable_scope('DetAE', reuse=tf.AUTO_REUSE):
    full_out_1 = fully_connected(Z, 64)
    full_out_2 = fully_connected(full_out_1, 128)
    Yprime = fully_connected(full_out_2, ydim)
    return Yprime
Example #5
0
 def _generate_out(self, Z):
   """
   """
   ydim  = self.ydim
   full_out_1 = fully_connected(Z, 64)
   full_out_2 = fully_connected(full_out_1, 128)
   Yprime = fully_connected(full_out_2, ydim)
   
   return Yprime
def embedding_model(vd_feats,
                    sent_feats,
                    train_phase,
                    vd_labels,
                    fc_dim=2048,
                    embed_dim=512):
    """
        Build two-branch embedding networks.
        fc_dim: the output dimension of the first fc layer.
        embed_dim: the output dimension of the second fc layer, i.e.
                   embedding space dimension.
    """
    # video branch.
    dim_hidden = 1000
    dim_video = 4096
    n_video_lstm_step = 80
    batch_size = vd_feats.shape[0]
    lstm1 = tf.contrib.rnn.BasicLSTMCell(dim_hidden, state_is_tuple=False)
    lstm2 = tf.contrib.rnn.BasicLSTMCell(dim_hidden, state_is_tuple=False)
    encode_video_W = tf.Variable(tf.random_uniform([dim_video, dim_hidden],
                                                   -0.1, 0.1),
                                 name='encode_video_W')
    encode_video_b = tf.Variable(tf.zeros([dim_hidden]), name='encode_video_b')
    video_flat = tf.reshape(vd_feats, [-1, dim_video])
    video_emb = tf.nn.xw_plus_b(video_flat, encode_video_W, encode_video_b)
    video_emb = tf.reshape(video_emb,
                           [batch_size, n_video_lstm_step, dim_hidden])

    state1 = tf.zeros([batch_size, lstm1.state_size])
    state2 = tf.zeros([batch_size, lstm2.state_size])
    padding = tf.zeros([batch_size, dim_hidden])
    with tf.variable_scope(tf.get_variable_scope()) as scope:
        for i in range(0, n_video_lstm_step):
            if i > 0:
                tf.get_variable_scope().reuse_variables()
            with tf.variable_scope("LSTM1"):
                output1, state1 = lstm1(video_emb[:, i, :], state1)
            with tf.variable_scope("LSTM2"):
                output2, state2 = lstm2(tf.concat([padding, output1], 1),
                                        state2)

    im_fc1 = add_fc(tf.concat([state1, state2], 1), fc_dim, train_phase,
                    'vd_embed_1')
    im_fc2 = fully_connected(im_fc1,
                             embed_dim,
                             activation_fn=None,
                             scope='vd_embed_2')
    v_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)
    # Text branch.
    sent_fc1 = add_fc(sent_feats, fc_dim, train_phase, 'sent_embed_1')
    sent_fc2 = fully_connected(sent_fc1,
                               embed_dim,
                               activation_fn=None,
                               scope='sent_embed_2')
    s_embed = tf.nn.l2_normalize(sent_fc2, 1, epsilon=1e-10)
    return v_embed, s_embed
Example #7
0
 def _linear(self, input_tensor, output_nums, l2_reg, activation_fn=None):
     if l2_reg <= 0:
         return layers.fully_connected(input_tensor, output_nums, activation_fn=activation_fn,
                     weights_initializer=layers.xavier_initializer(),
                     biases_initializer=layers.xavier_initializer(),)
     else:
         return layers.fully_connected(input_tensor, output_nums, activation_fn=activation_fn,
                 weights_initializer=layers.xavier_initializer(),
                 biases_initializer=layers.xavier_initializer(),
                 weights_regularizer=layers.l2_regularizer(l2_reg), biases_regularizer=layers.l2_regularizer(l2_reg))
Example #8
0
def encode_phrases(args, phrase_plh, train_phase_plh, num_phrases_plh, phrase_feature_dim, phrase_denom_plh, vecs):
    final_embed = args.dim_embed
    embed_dim = final_embed * 4
    phrase_plh = tf.reshape(phrase_plh, [-1, num_phrases_plh, phrase_feature_dim])
    # sometimes finetuning word embedding helps (with l2 reg), but often doesn't
    # seem to make a big difference
    word_embeddings = tf.get_variable('word_embeddings', vecs.shape, initializer=tf.constant_initializer(vecs), trainable = args.embedding_ft)
    embedded_words = tf.nn.embedding_lookup(word_embeddings, phrase_plh)

    # if you do finetune
    embed_l2reg = tf.zeros(1)
    if args.embedding_ft:
        embed_l2reg = tf.nn.l2_loss(word_embeddings - vecs)

    eps = 1e-10
    if args.language_model == 'gru':
        phrase_plh = tf.reshape(phrase_plh, [-1, phrase_feature_dim])
        source_sequence_length = tf.reduce_sum(tf.cast(phrase_plh > 0, tf.int32), 1)
        embedded_words = tf.reshape(embedded_words, [-1, phrase_feature_dim, vecs.shape[1]])
        encoder_cell = tf.nn.rnn_cell.GRUCell(final_embed)
        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
            encoder_cell, embedded_words, dtype=encoder_cell.dtype,
            sequence_length=source_sequence_length)
        final_outputs = extract_axis_1(encoder_outputs, source_sequence_length-1)
        phrase_input = tf.reshape(final_outputs, [-1, num_phrases_plh, final_embed])

        outputs = fully_connected(phrase_input, embed_dim, activation_fn = None,
                                  weights_regularizer = tf.contrib.layers.l2_regularizer(0.005),
                                  scope = 'phrase_encoder')
        phrase_embed = tf.nn.l2_normalize(outputs, 2, epsilon=eps)
    else:
        num_words = tf.reduce_sum(tf.to_float(phrase_plh > 0), 2, keep_dims=True) + eps
        phrase_input = tf.nn.l2_normalize(tf.reduce_sum(embedded_words, 2) / num_words, 2)
        if args.language_model == 'attend':
            context_vector = tf.tile(tf.expand_dims(phrase_input, 2), (1, 1, phrase_feature_dim, 1))
            attention_inputs = tf.concat((context_vector, embedded_words), 3)
            attention_weights = fully_connected(attention_inputs, 1, 
                                                weights_regularizer = l2_regularizer(0.0005),
                                                scope = 'self_attend')
            attention_weights = tf.nn.softmax(tf.squeeze(attention_weights))
            phrase_input = tf.nn.l2_normalize(tf.reduce_sum(embedded_words * tf.expand_dims(attention_weights, 3), 2), 2)
            phrase_input = tf.reshape(phrase_input, [-1, num_phrases_plh, vecs.shape[1]])

        if args.cca_parameters:
            parameters = pickle.load(open(args.cca_parameters, 'rb'))
            phrase_embed = setup_initialize_fc_layers(args, phrase_input, parameters, 'lang', train_phase_plh, norm_axis=2)
        else:
            phrase_embed = embedding_branch(phrase_input, embed_dim, train_phase_plh, 'phrase', norm_axis=2)

    concept_weights = embedding_branch(phrase_input, embed_dim, train_phase_plh, 'concept_weight',
                                       do_l2norm = False, outdim = args.num_embeddings)
    concept_loss = tf.reduce_sum(tf.norm(concept_weights, axis=2, ord=1)) / phrase_denom_plh
    concept_weights = tf.nn.softmax(concept_weights)
    return phrase_embed, concept_weights, concept_loss, embed_l2reg
Example #9
0
  def _build_default(self):
    """
    """
    Y = self.Y
    zdim = self.zdim
    with tf.variable_scope('DetAE', reuse=tf.AUTO_REUSE):
      full_in_1 = fully_connected(Y, 128)
      full_in_2 = fully_connected(full_in_1, 64)
      Z = fully_connected(full_in_2, zdim)

      Yprime = self._build_out_default(Z)    
    return Yprime, Z
Example #10
0
  def _define_default(self):
    """
    """
    Y = self.Y
    zdim = self.zdim
    
    full_in_1 = fully_connected(Y, 128)
    full_in_2 = fully_connected(full_in_1, 64)
    Z = fully_connected(full_in_2, zdim)

    Yprime = self._generate_out(Z)    
    return Yprime, Z
Example #11
0
def define_graph(input_image, batch_size):
    print("string define the graph...")

    # 卷积层1
    conv2d_layer_1 = convolution2d(
    input_image,
    num_outputs = 32, # filter 个数
    kernel_size = (5,5),      # filter 的宽和高
    activation_fn = tf.nn.relu,
    weights_initializer = tf.random_normal_initializer,
    stride = [2, 2],
    trainable = True)

    pool_layer_1 = tf.nn.max_pool(conv2d_layer_1, ksize = [1, 2, 2, 1],
                    strides = [1, 2, 2, 1],
                    padding = 'SAME')

    # 卷积层2
    conv2d_layer_2 = convolution2d(
    pool_layer_1,
    num_outputs = 64,
    kernel_size = (5, 5),
    activation_fn = tf.nn.relu,
    weights_initializer = tf.random_normal_initializer,
    stride = (1, 1),
    trainable = True)

    pool_layer_2 = tf.nn.max_pool(conv2d_layer_2, ksize = [1, 2, 2, 1],
                    strides = [1, 2, 2, 1],
                    padding = 'SAME')

    flattened_layer_2 = tf.reshape(pool_layer_2, [ batch_size, -1 ])

    # weight_init 参数也可以接收一个可调用参数,这里使用的了一个lambda 表达式返回了一个截断的正态分布,并指定了标准差
    hidden_layer_3 = fully_connected(
    flattened_layer_2,
    512,
    weights_initializer = lambda i, dtype, partition_info: tf.truncated_normal([38912, 512], stddev = 0.1),
    activation_fn = tf.nn.relu
    )

    # 对一些神经元进行dropout,削减他们在模型中的重要性
    hidden_layer_3 = tf.nn.dropout(hidden_layer_3, 0.1)

    # 输出是前面的层与训练中可用的120个不同狗品种的全连接
    final_fully_connected = fully_connected(
    hidden_layer_3,
    120, # 120 种狗
    weights_initializer = lambda i, dtype, partition_info: tf.truncated_normal([512, 120], stddev = 0.1)
    )
    print("graph is ready")
    return final_fully_connected
Example #12
0
def generator(z, y):
    '''
    args:
        z: random vector
    returns:
        net: generator network
    '''
    zf = layers.fully_connected(z, 1000, scope='fcz')
    yf = layers.fully_connected(y, 200, scope='fcy')
    net = tf.concat(1, [zf, yf])
    net = layers.fully_connected(net, 1200, activation_fn=tf.nn.relu, scope='fc1')
    net = layers.fully_connected(net, 1200, activation_fn=tf.nn.relu, scope='fc2')
    net = layers.fully_connected(net, WIDTH*WIDTH, scope='fc3')
    return net
Example #13
0
def pose_net(tgt_image, src_image_stack, is_training=True):

    inputs = tf.concat([tgt_image, src_image_stack], axis=3)
    B, H, W, C = src_image_stack.get_shape().as_list()
    num_source = int(C // 3)
    with tf.variable_scope('pose_net') as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, slim.conv2d_transpose, fully_connected],
                normalizer_fn=None,
                weights_regularizer=slim.l2_regularizer(0.05),
                activation_fn=tf.nn.relu,
                outputs_collections=end_points_collection):
            # cnv1 to cnv5b are shared between pose and explainability prediction
            cnv1 = slim.conv2d(inputs, 16, [7, 7], stride=2, scope='cnv1')
            cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2')
            cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3')
            cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4')
            cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5')
            # Pose specific layers
            with tf.variable_scope('pose'):
                cnv6 = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6')
                cnv7 = slim.conv2d(cnv6, 512, [3, 3], stride=2, scope='cnv7')

                flat_1 = tf.reshape(cnv7, (B, -1))

                dense_1 = fully_connected(flat_1,
                                          512,
                                          activation_fn=tf.nn.relu)

                pose_pred_xyz = fully_connected(dense_1,
                                                3 * num_source,
                                                activation_fn=None)

                pose_pred_quat = fully_connected(dense_1,
                                                 3 * num_source,
                                                 activation_fn=None)

                pose_avg = tf.concat((pose_pred_xyz, pose_pred_quat), 1)

                pose_avg = 0.1 * pose_avg

                pose_final = tf.reshape(pose_avg, [-1, num_source, 6])

            end_points = utils.convert_collection_to_dict(
                end_points_collection)

            return pose_final, end_points
Example #14
0
    def setup_lstm(self, encoder_cell, embedded_word_ids, tokens,
                   source_sequence_length, reuse, suffix):
        universal_embedding, avg_words = universal_embedding_layer(
            embedded_word_ids, tokens, self.embed_dim, suffix)
        if self.args.separate_lang_branch:
            reuse = None
            suffix = '_' + suffix
        else:
            suffix = ''

        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
            encoder_cell,
            universal_embedding,
            dtype=tf.float32,
            sequence_length=source_sequence_length,
            scope='rnn' + suffix)

        final_outputs = extract_axis_1(encoder_outputs,
                                       source_sequence_length - 1)
        outputs = fully_connected(
            final_outputs,
            self.embed_dim,
            activation_fn=None,
            weights_regularizer=tf.contrib.layers.l2_regularizer(0.005),
            scope='phrase_encoder' + suffix,
            reuse=reuse)

        sent_embed = tf.nn.l2_normalize(outputs, 1, epsilon=1e-10)
        return sent_embed, avg_words
Example #15
0
    def __init__(self, lr, st_size, act_size):
        # Current State Placeholder
        self.current_state_in = tf.placeholder(shape=[1], dtype=tf.int32)
        # Create a One hot vector
        current_state_OneHot = tf.one_hot(self.current_state_in, st_size)

        # Output of the Fully Connected Layer
        output = fully_connected(current_state_OneHot, act_size,
                                 activation_fn=tf.nn.sigmoid,
                                 biases_initializer=None,
                                 weights_initializer=tf.ones_initializer())

        self.output = tf.reshape(output, [-1])
        self.chosen_action = tf.argmax(self.output, 0)

        # Placeholder to store rewards
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(self.output, self.action_holder, [1])

        # Policy Gradient Calculation
        self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)

        # Define and use Gradient Descent Optimizer
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        # Update the Neural Network Agen values
        self.update = optimizer.minimize(self.loss)

# -------------------- EOC -----------------------
def add_fc(inputs, outdim, train_phase, scope_in):
    fc =  fully_connected(inputs, outdim, activation_fn=None, scope=scope_in + '/fc')
    fc_bnorm = tf.layers.batch_normalization(fc, momentum=0.1, epsilon=1e-5,
                         training=train_phase, name=scope_in + '/bnorm')
    fc_relu = tf.nn.relu(fc_bnorm, name=scope_in + '/relu')
    fc_out = tf.layers.dropout(fc_relu, rate= 0.1, seed=0, training=train_phase, name=scope_in + '/dropout')
    return fc_out
Example #17
0
def setup_initialize_fc_layers(args,
                               feats,
                               parameters,
                               scope_in,
                               train_phase,
                               norm_axis=2):
    for i, params in enumerate(parameters):
        scaling = params['scaling']
        outdim = len(scaling)
        cca_mean, cca_proj = params[scope_in + '_mean'], params[scope_in +
                                                                '_proj']
        weights_init = tf.constant_initializer(cca_proj, dtype=tf.float32)
        weight_reg = weight_l2_regularizer(params[scope_in + '_proj'],
                                           args.cca_weight_reg)
        if (i + 1) < len(parameters):
            activation_fn = tf.nn.relu
        else:
            activation_fn = None

        feats = fully_connected(
            feats - cca_mean,
            outdim,
            activation_fn=activation_fn,
            weights_initializer=weights_init,
            weights_regularizer=weight_reg,
            #trainable=False,
            scope=scope_in + '_embed_' + str(i)) * scaling

    feats = tf.nn.l2_normalize(feats, norm_axis, epsilon=1e-10)
    return feats
Example #18
0
def embedding_branch(x,
                     embed_dim,
                     train_phase_plh,
                     scope_in,
                     do_l2norm=True,
                     outdim=None,
                     norm_axis=1):
    """Applies a pair of fully connected layers to the input tensor.

    Arguments:
    x -- input_tensor
    embed_dim -- dimension of the input to the second fully connected layer
    train_phase_plh -- indicator whether model is in training mode
    scope_in -- scope prefix for the desired layers
    do_l2norm -- indicates if the output should be l2 normalized
    outdim -- dimension of the output embedding, if None outdim=embed_dim
    """
    embed_fc1 = add_fc(x, embed_dim, train_phase_plh, scope_in + '_embed_1')
    if outdim is None:
        outdim = embed_dim

    l2_reg = tf.contrib.layers.l2_regularizer(0.001)
    embed_fc2 = fully_connected(embed_fc1,
                                outdim,
                                activation_fn=None,
                                weights_regularizer=l2_reg,
                                scope=scope_in + '_embed_2')
    if do_l2norm:
        embed_fc2 = tf.nn.l2_normalize(embed_fc2, norm_axis)

    return embed_fc2
def embedding_model(im_feats,
                    sent_feats,
                    train_phase,
                    im_labels,
                    fc_dim=1024,
                    embed_dim=1024):
    """
        Build two-branch embedding networks.
        fc_dim: the output dimension of the first fc layer.
        embed_dim: the output dimension of the second fc layer, i.e.
                   embedding space dimension.
    """

    # Image branch.
    # layer_1 = tf.add(tf.matmul(im_feats, wf['h1']), bf['b1'])
    # layer_2 = tf.add(tf.matmul(layer_1, wf['h2']), bf['b2'])
    # layer_3 = tf.add(tf.matmul(layer_2, wf['h3']), bf['b3'])

    im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1')

    im_fc2 = fully_connected(im_fc1,
                             embed_dim,
                             activation_fn=None,
                             scope='im_embed_2')
    fc_bnorm = tf.layers.batch_normalization(im_fc2,
                                             momentum=0.1,
                                             epsilon=1e-5,
                                             training=train_phase)
    # im_fc2 = tf.layers.dense(im_fc1, embed_dim, activation=tf.nn.tanh)
    i_embed = tf.nn.l2_normalize(fc_bnorm, 1, epsilon=1e-10)
    # Voice branch.
    # layer_1 = tf.add(tf.matmul(sent_feats, wv['h1']), bv['b1'])
    # layer_2 = tf.add(tf.matmul(layer_1, wv['h2']), bv['b2'])
    # layer_3 = tf.add(tf.matmul(layer_2, wv['h3']), bv['b3'])

    sent_fc1 = add_fc(sent_feats, fc_dim, train_phase, 'sent_embed_1')
    sent_fc2 = fully_connected(sent_fc1,
                               embed_dim,
                               activation_fn=None,
                               scope='sent_embed_2')
    fc_bnorm_sent = tf.layers.batch_normalization(sent_fc2,
                                                  momentum=0.1,
                                                  epsilon=1e-5,
                                                  training=train_phase)
    # sent_fc2 = tf.layers.dense(sent_fc1, embed_dim, activation=None)
    s_embed = tf.nn.l2_normalize(fc_bnorm_sent, 1, epsilon=1e-10)
    return i_embed, s_embed
Example #20
0
    def _get_scale_tril(self, _input, hid_layer=None):
        """
    """
        dirs = self.directives
        num_layers = dirs['num_layers']
        num_nodes = dirs['num_nodes']
        activation = dirs['activation']
        net_grow_rate = dirs['net_grow_rate']

        output_dim = self._oslot_to_shape[0][-1]
        with tf.variable_scope(self.name + '_scale', reuse=tf.AUTO_REUSE):
            if dirs['share_params']:
                output_chol = fully_connected(hid_layer,
                                              output_dim**2,
                                              activation_fn=None)
            else:
                print("_input:", _input)
                hid_layer = fully_connected(
                    _input,
                    num_nodes,
                    activation_fn=activation,
                    biases_initializer=tf.random_normal_initializer(
                        stddev=1 / np.sqrt(num_nodes)))
                for _ in range(num_layers - 1):
                    num_nodes = int(num_nodes * net_grow_rate)
                    hid_layer = fully_connected(
                        hid_layer,
                        num_nodes,
                        activation_fn=activation,
                        biases_initializer=tf.random_normal_initializer(
                            stddev=1 / np.sqrt(num_nodes)))
                output_chol = fully_connected(
                    hid_layer,
                    output_dim**2,
                    activation_fn=None,
                    weights_initializer=tf.random_normal_initializer(
                        stddev=1e-4),
                    biases_initializer=tf.random_normal_initializer(
                        stddev=1 / np.sqrt(output_dim**2)))

    #           normalizer_fn=lambda x : x/tf.sqrt(x**2),
            output_chol = tf.reshape(
                output_chol,
                #                              shape=[self.batch_size, output_dim, output_dim])
                shape=[-1, output_dim, output_dim])
        return output_chol
Example #21
0
 def setup_img_model(self, im_feats, train_phase):
     im_fc1 = add_fc(im_feats, self.fc_dim, train_phase, 'im_embed_1')
     im_fc2 = fully_connected(im_fc1,
                              self.embed_dim,
                              activation_fn=None,
                              scope='im_embed_2')
     i_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)
     return i_embed
Example #22
0
def discriminator(x,y):
    '''
    args:
        x: data vector
    returns:
        net: discriminator network
    '''
    xf = layers.fully_connected(x, 1000, scope='fcx')
    yf = layers.fully_connected(y, 200, scope='fcy') 
    net = tf.concat(1, [xf, yf])
    net = layers.dropout(net, 0.2, scope='do1') 
    net = layers.fully_connected(net, 1200, activation_fn=tf.nn.sigmoid, scope='fc1')
    net = layers.dropout(net, 0.5, scope='do2')
    net = layers.fully_connected(net, 1200, activation_fn=tf.nn.sigmoid, scope='fc2')
    net = compare_to_minibatch(net)
    # no activation function because it's in the cost function used later.
    net = layers.fully_connected(net, 1, scope='fc3', activation_fn=None) 
    return net
def embedding_model(im_feats, sent_feats, train_phase, im_labels,
                    fc_dim = 1024, embed_dim = 512):
    """
        Build two-branch embedding networks.
        fc_dim: the output dimension of the first fc layer.
        embed_dim: the output dimension of the second fc layer, i.e.
                   embedding space dimension.
    """
    # Image branch.
    #is_training = tf.placeholder(dtype=tf.bool, shape=())
    im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1')
    #im_fc2=add_fc(im_fc1,512,train_phase,'im_embed_2')
    #im_fc3=add_fc(im_fc2,256,train_phase,'im_embed_3')
    im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None,
                             scope = 'im_embed_2')
    i_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)

    #im_fc3 = fully_connected(i_embed, 30, activation_fn=None,
    #                         scope = 'im_embed_3')
    #attr1 = tf.nn.sigmoid(im_fc3,name = 'attr_rec1')
    # Text branch.
    #print("type",sent_feats)
    sent_f=tf.to_int32(sent_feats,name='ToInt32')
    sent_f=tf.one_hot(sent_f,2)
    sent_fc0 = add_fc(sent_f, 4, train_phase,'sent_embed_0')
    #print("sent_one_hot",sent_f)
    #sent_fc0 = add_fc(sent_f,10,train_phase,'sent_embed_0')
    sent_fc0 = tf.layers.flatten(sent_fc0)
    #print("sent_fc_0",sent_fc0)
    #sent_fc1 = add_fc(sent_fc0, 128, train_phase,'sent_embed_1')
    #print("sent_fc1",sent_fc1)
    sent_fc2 = add_fc(sent_fc0, 256, train_phase,'sent_embed_2')
    #sent_fc3 = add_fc(sent_fc2,512 , train_phase,'sent_embed_3')
    #sent_fc4 = add_fc(sent_fc2,1024 , train_phase,'sent_embed_4')
    sent_fc3 = fully_connected(sent_fc2, embed_dim, activation_fn=None,
                               scope = 'sent_embed_3')
    s_embed = tf.nn.l2_normalize(sent_fc3, 1, epsilon=1e-10)
    #sent_fc3=im_fc3 = fully_connected(s_embed, 30, activation_fn=None,
    #                         scope = 'sent_embed_3')
    #attr2 = tf.nn.sigmoid(sent_fc3,name = 'attr_rec2')
    #attr2 = fully_connected(sent_fc2,30,activation_fn=None,scope = 'attr_rec2')
    return i_embed, s_embed
Example #24
0
 def decoder(self,x,keep_prob):
     d_fc1 = fully_connected(x, 4096, None)
     d_fc1 = tf.nn.dropout(d_fc1, keep_prob)
     #print d_fc1
     d_unflat1 = tf.reshape(d_fc1, [-1,16,16,16])
     #print d_unflat1
     d_conv1 = convolution2d_transpose(d_unflat1, 32, [5,5], [2,2], padding='SAME')
     #print d_conv1
     d_conv2 = convolution2d_transpose(d_conv1, 3, [5,5], [2,2], padding='SAME')
     #print d_conv2
     return d_conv2
Example #25
0
    def encoder(self,x, keep_prob):


        e_conv1 = convolution2d(x, 32, [5,5], [2,2], padding='SAME')
        #print e_conv1
        e_conv2 = convolution2d(e_conv1, 16, [5,5], [2,2], padding='SAME')
        #print e_conv2
        e_flat1 = flatten(e_conv2)
        #print e_flat1
        #e_flat1 = tf.nn.dropout(e_flat1 keep_prob)
        e_fc1 = fully_connected(e_flat1, self.params['final_layer'], None)
        e_fc1 = tf.nn.dropout(e_fc1, keep_prob)
        return e_fc1
def embedding_model(feats, train_phase, scope_name,
                    fc_dim = n_inputs, embed_dim = n_hidden):
    """
        Build two-branch embedding networks.
        fc_dim: the output dimension of the first fc layer.
        embed_dim: the output dimension of the second fc layer, i.e.
                   embedding space dimension.
    """
    # each branch.
    fc1 = add_fc(feats, fc_dim, train_phase, scope_name)
    fc2 = fully_connected(fc1, embed_dim, activation_fn=None,
                               scope = scope_name + '_2')
    embed = tf.nn.l2_normalize(fc2, 1, epsilon=1e-10)
    return embed
Example #27
0
def setup_img_model(im_feats, train_phase, args, fc_dim=2048, embed_dim=512):
    if args.init_filename:
        parameters = pickle.load(open(args.init_filename, 'rb'))
        i_embed = setup_initialize_fc_layers(im_feats, parameters, 'vis',
                                             train_phase, args)
    else:
        im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1')
        im_fc2 = fully_connected(
            im_fc1,
            embed_dim,
            activation_fn=None,
            weights_regularizer=tf.contrib.layers.l2_regularizer(0.0005),
            scope='im_embed_2')
        i_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)

    return i_embed
Example #28
0
def compare_to_minibatch(input, num_kernels=5, kernel_dim=3):
    '''
    take output of intermediate layer of the discriminator,
    and compare individual samples within the minibatch
    '''
    #multiply discriminator layer by 3D tensor to produce matrix
    x = layers.fully_connected(input, num_kernels * kernel_dim)
    activation = tf.reshape(x, (-1, num_kernels, kernel_dim))

    #compute L1-distance between rows of matrix
    diffs = tf.expand_dims(activation, 3) - \
        tf.expand_dims(tf.transpose(activation, [1, 2, 0]), 0)
    abs_diffs = tf.reduce_sum(tf.abs(diffs), 2)

    #apply negative exponential
    minibatch_features = tf.reduce_sum(tf.exp(-abs_diffs), 2)
    return tf.concat(1, [input, minibatch_features])
Example #29
0
def universal_embedding_layer(embedded_word_ids,
                              tokens,
                              embed_dim,
                              suffix,
                              trainable=True):
    universal_embedding = fully_connected(
        embedded_word_ids,
        embed_dim,
        activation_fn=None,
        weights_regularizer=tf.contrib.layers.l2_regularizer(0.005),
        trainable=trainable,
        scope='mule_' + suffix)

    num_words = tf.reduce_sum(tf.to_float(tokens > 0), 1,
                              keep_dims=True) + 1e-10
    avg_words = tf.nn.l2_normalize(
        tf.reduce_sum(universal_embedding, 1) / num_words, 1)
    return universal_embedding, avg_words
Example #30
0
    def get_phrase_scores(self, phrase_embed, region_embed, concept_weights):
        elementwise_prod = tf.expand_dims(phrase_embed, 2) * tf.expand_dims(
            region_embed, 1)
        joint_embed_1 = add_fc(elementwise_prod, self.embed_dim,
                               self.train_phase, 'joint_embed_1')
        joint_embed_2 = concept_layer(joint_embed_1, self.final_embed,
                                      self.train_phase, 1, concept_weights)
        for concept_id in range(2, self.args.num_embeddings + 1):
            joint_embed_2 += concept_layer(joint_embed_1, self.final_embed,
                                           self.train_phase, concept_id,
                                           concept_weights)

        joint_embed_3 = fully_connected(
            joint_embed_2,
            1,
            activation_fn=None,
            weights_regularizer=l2_regularizer(0.005),
            scope='joint_embed_3')
        joint_embed_3 = tf.squeeze(joint_embed_3, [3])
        region_prob = 1. / (1. + tf.exp(-joint_embed_3))
        return region_prob, joint_embed_3